@powersync/service-module-mongodb 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import { MongoLSN } from '../common/MongoLSN.js';
7
7
  import { PostImagesOption } from '../types/types.js';
8
8
  import { escapeRegExp } from '../utils.js';
9
9
  import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
10
- import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
10
+ import { MongoSnapshotter } from './MongoSnapshotter.js';
11
11
  import { parseChangeDocument, rawChangeStream } from './RawChangeStream.js';
12
12
  import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
13
13
  import { DirectSourceRowConverter } from './SourceRowConverter.js';
@@ -34,13 +34,20 @@ export class ChangeStream {
34
34
  defaultDb;
35
35
  metrics;
36
36
  maxAwaitTimeMS;
37
- abort_signal;
37
+ abortController = new AbortController();
38
+ abortSignal = this.abortController.signal;
39
+ initPromise = null;
40
+ snapshotter;
41
+ /**
42
+ * We use the relationCache _only_ for caching static SourceTable info, not for snapshot status.
43
+ */
38
44
  relationCache = new RelationCache(getCacheIdentifier);
39
45
  replicationLag = new ReplicationLagTracker();
40
46
  checkpointStreamId = new mongo.ObjectId();
41
47
  logger;
42
48
  snapshotChunkLength;
43
49
  changeStreamTimeout;
50
+ storageHooks;
44
51
  sourceRowConverter;
45
52
  constructor(options) {
46
53
  this.storage = options.storage;
@@ -49,6 +56,7 @@ export class ChangeStream {
49
56
  this.connections = options.connections;
50
57
  this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
51
58
  this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
59
+ this.storageHooks = options.storageHooks;
52
60
  this.client = this.connections.client;
53
61
  this.defaultDb = this.connections.db;
54
62
  this.sync_rules = options.storage.getParsedSyncRules({
@@ -58,14 +66,22 @@ export class ChangeStream {
58
66
  // The change stream aggregation command should timeout before the socket times out,
59
67
  // so we use 90% of the socket timeout value.
60
68
  this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
61
- this.abort_signal = options.abort_signal;
62
- this.abort_signal.addEventListener('abort', () => {
63
- // TODO: Fast abort?
64
- }, { once: true });
65
69
  this.logger = options.logger ?? this.storage.logger;
70
+ this.snapshotter = new MongoSnapshotter({
71
+ ...options,
72
+ abortSignal: this.abortSignal,
73
+ logger: this.logger,
74
+ checkpointStreamId: this.checkpointStreamId
75
+ });
76
+ options.abort_signal.addEventListener('abort', () => {
77
+ this.abortController.abort(options.abort_signal.reason);
78
+ }, { once: true });
79
+ if (options.abort_signal.aborted) {
80
+ this.abortController.abort(options.abort_signal.reason);
81
+ }
66
82
  }
67
83
  get stopped() {
68
- return this.abort_signal.aborted;
84
+ return this.abortSignal.aborted;
69
85
  }
70
86
  get usePostImages() {
71
87
  return this.connections.options.postImages != PostImagesOption.OFF;
@@ -73,229 +89,6 @@ export class ChangeStream {
73
89
  get configurePostImages() {
74
90
  return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
75
91
  }
76
- /**
77
- * This resolves a pattern, persists the related metadata, and returns
78
- * the resulting SourceTables.
79
- *
80
- * This implicitly checks the collection postImage configuration.
81
- */
82
- async resolveQualifiedTableNames(batch, tablePattern) {
83
- const schema = tablePattern.schema;
84
- if (tablePattern.connectionTag != this.connections.connectionTag) {
85
- return [];
86
- }
87
- let nameFilter;
88
- if (tablePattern.isWildcard) {
89
- nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix));
90
- }
91
- else {
92
- nameFilter = tablePattern.name;
93
- }
94
- let result = [];
95
- // Check if the collection exists
96
- const collections = await this.client
97
- .db(schema)
98
- .listCollections({
99
- name: nameFilter
100
- }, { nameOnly: false })
101
- .toArray();
102
- if (!tablePattern.isWildcard && collections.length == 0) {
103
- this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
104
- }
105
- for (let collection of collections) {
106
- const table = await this.handleRelation(batch, getMongoRelation({ db: schema, coll: collection.name }),
107
- // This is done as part of the initial setup - snapshot is handled elsewhere
108
- { snapshot: false, collectionInfo: collection });
109
- result.push(table);
110
- }
111
- return result;
112
- }
113
- async initSlot() {
114
- const status = await this.storage.getStatus();
115
- if (status.snapshot_done && status.checkpoint_lsn) {
116
- this.logger.info(`Initial replication already done`);
117
- return { needsInitialSync: false, snapshotLsn: null };
118
- }
119
- return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
120
- }
121
- async estimatedCount(table) {
122
- const count = await this.estimatedCountNumber(table);
123
- return `~${count}`;
124
- }
125
- async estimatedCountNumber(table) {
126
- const db = this.client.db(table.schema);
127
- return await db.collection(table.name).estimatedDocumentCount();
128
- }
129
- /**
130
- * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
131
- *
132
- * This LSN can survive initial replication restarts.
133
- */
134
- async getSnapshotLsn() {
135
- const hello = await this.defaultDb.command({ hello: 1 });
136
- // Basic sanity check
137
- if (hello.msg == 'isdbgrid') {
138
- throw new ServiceError(ErrorCode.PSYNC_S1341, 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).');
139
- }
140
- else if (hello.setName == null) {
141
- throw new ServiceError(ErrorCode.PSYNC_S1342, 'Standalone MongoDB instances are not supported - use a replicaset.');
142
- }
143
- // Open a change stream just to get a resume token for later use.
144
- // We could use clusterTime from the hello command, but that won't tell us if the
145
- // snapshot isn't valid anymore.
146
- // If we just use the first resumeToken from the stream, we get two potential issues:
147
- // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
148
- // in source db or other stream issues.
149
- // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
150
- // skip that event.
151
- // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
152
- // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
153
- // periodically until the timeout is reached.
154
- const LSN_TIMEOUT_SECONDS = 60;
155
- const LSN_CREATE_INTERVAL_SECONDS = 1;
156
- // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
157
- const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
158
- const startTime = performance.now();
159
- let lastCheckpointCreated = performance.now();
160
- let eventsSeen = 0;
161
- let batchesSeen = 0;
162
- const filters = this.getSourceNamespaceFilters();
163
- const iter = this.rawChangeStreamBatches({
164
- lsn: firstCheckpointLsn,
165
- maxAwaitTimeMS: 0,
166
- signal: this.abort_signal,
167
- filters
168
- });
169
- for await (let { events } of iter) {
170
- if (performance.now() - startTime >= LSN_TIMEOUT_SECONDS * 1000) {
171
- break;
172
- }
173
- if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
174
- await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
175
- lastCheckpointCreated = performance.now();
176
- }
177
- batchesSeen += 1;
178
- for (let rawChangeDocument of events) {
179
- const changeDocument = parseChangeDocument(rawChangeDocument);
180
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
181
- if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
182
- const checkpointId = changeDocument.documentKey._id;
183
- if (!this.checkpointStreamId.equals(checkpointId)) {
184
- continue;
185
- }
186
- const { comparable: lsn } = new MongoLSN({
187
- timestamp: changeDocument.clusterTime,
188
- resume_token: changeDocument._id
189
- });
190
- return lsn;
191
- }
192
- eventsSeen += 1;
193
- }
194
- }
195
- // Could happen if there is a very large replication lag?
196
- throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}, batches = ${batchesSeen}`);
197
- }
198
- /**
199
- * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
200
- */
201
- async validateSnapshotLsn(lsn) {
202
- const filters = this.getSourceNamespaceFilters();
203
- const stream = this.rawChangeStreamBatches({
204
- lsn: lsn,
205
- // maxAwaitTimeMS should never actually be used here
206
- maxAwaitTimeMS: 0,
207
- filters
208
- });
209
- for await (let _batch of stream) {
210
- // We got a response from the aggregate command, so consider the LSN valid.
211
- // Close the stream immediately.
212
- break;
213
- }
214
- }
215
- async initialReplication(snapshotLsn) {
216
- const sourceTables = this.sync_rules.getSourceTables();
217
- await this.client.connect();
218
- const tracer = new PerformanceTracer('MongoDB initial replication');
219
- const flushResult = await this.storage.startBatch({
220
- logger: this.logger,
221
- zeroLSN: MongoLSN.ZERO.comparable,
222
- defaultSchema: this.defaultDb.databaseName,
223
- storeCurrentData: false,
224
- skipExistingRows: true,
225
- tracer
226
- }, async (batch) => {
227
- if (snapshotLsn == null) {
228
- // First replication attempt - get a snapshot and store the timestamp
229
- snapshotLsn = await this.getSnapshotLsn();
230
- await batch.setResumeLsn(snapshotLsn);
231
- this.logger.info(`Marking snapshot at ${snapshotLsn}`);
232
- }
233
- else {
234
- this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
235
- // Check that the snapshot is still valid.
236
- await this.validateSnapshotLsn(snapshotLsn);
237
- }
238
- // Start by resolving all tables.
239
- // This checks postImage configuration, and that should fail as
240
- // early as possible.
241
- let allSourceTables = [];
242
- for (let tablePattern of sourceTables) {
243
- const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
244
- allSourceTables.push(...tables);
245
- }
246
- let tablesWithStatus = [];
247
- for (let table of allSourceTables) {
248
- if (table.snapshotComplete) {
249
- this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
250
- continue;
251
- }
252
- let count = await this.estimatedCountNumber(table);
253
- const updated = await batch.updateTableProgress(table, {
254
- totalEstimatedCount: count
255
- });
256
- tablesWithStatus.push(updated);
257
- this.relationCache.update(updated);
258
- this.logger.info(`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`);
259
- }
260
- for (let table of tablesWithStatus) {
261
- await this.snapshotTable(batch, table);
262
- await batch.markTableSnapshotDone([table]);
263
- this.touch();
264
- }
265
- // The checkpoint here is a marker - we need to replicate up to at least this
266
- // point before the data can be considered consistent.
267
- // We could do this for each individual table, but may as well just do it once for the entire snapshot.
268
- const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
269
- await batch.markAllSnapshotDone(checkpoint);
270
- // This will not create a consistent checkpoint yet, but will persist the op.
271
- // Actual checkpoint will be created when streaming replication caught up.
272
- await batch.commit(snapshotLsn);
273
- this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
274
- });
275
- return { lastOpId: flushResult?.flushed_op };
276
- }
277
- async setupCheckpointsCollection() {
278
- const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
279
- if (collection == null) {
280
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
281
- changeStreamPreAndPostImages: { enabled: true }
282
- });
283
- }
284
- else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
285
- // Drop + create requires less permissions than collMod,
286
- // and we don't care about the data in this collection.
287
- await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
288
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
289
- changeStreamPreAndPostImages: { enabled: true }
290
- });
291
- }
292
- else {
293
- // Clear the collection on startup, to keep it clean
294
- // We never query this collection directly, and don't want to keep the data around.
295
- // We only use this to get data into the oplog/changestream.
296
- await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
297
- }
298
- }
299
92
  getSourceNamespaceFilters() {
300
93
  const sourceTables = this.sync_rules.getSourceTables();
301
94
  let $inFilters = [
@@ -343,73 +136,10 @@ export class ChangeStream {
343
136
  }
344
137
  return { $match: nsFilter, multipleDatabases };
345
138
  }
346
- async snapshotTable(batch, table) {
347
- const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
348
- const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
349
- const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
350
- const totalEstimatedCount = await this.estimatedCountNumber(table);
351
- let at = table.snapshotStatus?.replicatedCount ?? 0;
352
- const db = this.client.db(table.schema);
353
- const collection = db.collection(table.name);
354
- await using query = new ChunkedSnapshotQuery({
355
- collection,
356
- key: table.snapshotStatus?.lastKey,
357
- batchSize: this.snapshotChunkLength
358
- });
359
- if (query.lastKey != null) {
360
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`);
361
- }
362
- else {
363
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
364
- }
365
- let lastBatch = performance.now();
366
- let nextChunkPromise = query.nextChunk();
367
- while (true) {
368
- const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
369
- if (docBatch.length == 0) {
370
- // No more data - stop iterating
371
- break;
372
- }
373
- bytesReplicatedMetric.add(chunkBytes);
374
- chunksReplicatedMetric.add(1);
375
- if (this.abort_signal.aborted) {
376
- throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
377
- }
378
- // Pre-fetch next batch, so that we can read and write concurrently
379
- nextChunkPromise = query.nextChunk();
380
- for (let buffer of docBatch) {
381
- const { row: record, replicaId: replicaId } = this.rawToSqliteRow(buffer);
382
- // This auto-flushes when the batch reaches its size limit
383
- await batch.save({
384
- tag: SaveOperationTag.INSERT,
385
- sourceTable: table,
386
- before: undefined,
387
- beforeReplicaId: undefined,
388
- after: record,
389
- afterReplicaId: replicaId
390
- });
391
- }
392
- // Important: flush before marking progress
393
- await batch.flush();
394
- at += docBatch.length;
395
- rowsReplicatedMetric.add(docBatch.length);
396
- table = await batch.updateTableProgress(table, {
397
- lastKey,
398
- replicatedCount: at,
399
- totalEstimatedCount: totalEstimatedCount
400
- });
401
- this.relationCache.update(table);
402
- const duration = performance.now() - lastBatch;
403
- lastBatch = performance.now();
404
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`);
405
- this.touch();
406
- }
407
- // In case the loop was interrupted, make sure we await the last promise.
408
- await nextChunkPromise;
409
- }
410
- async getRelation(batch, descriptor, options) {
411
- const existing = this.relationCache.get(descriptor);
139
+ async getRelations(batch, descriptor, options) {
140
+ const existing = this.relationCache.getAll(descriptor);
412
141
  if (existing != null) {
142
+ // We do this even when it's an empty result: Empty means nothing to sync, and we don't need to re-resolve.
413
143
  return existing;
414
144
  }
415
145
  // Note: collection may have been dropped at this point, so we handle
@@ -452,14 +182,11 @@ export class ChangeStream {
452
182
  // Ignore the postImages check in this case.
453
183
  }
454
184
  const snapshot = options.snapshot;
455
- const result = await this.storage.resolveTable({
456
- group_id: this.group_id,
185
+ const result = await batch.resolveTables({
457
186
  connection_id: this.connection_id,
458
- connection_tag: this.connections.connectionTag,
459
- entity_descriptor: descriptor,
460
- sync_rules: this.sync_rules
187
+ source: descriptor
461
188
  });
462
- this.relationCache.update(result.table);
189
+ this.relationCache.updateAll(descriptor, result.tables);
463
190
  // Drop conflicting collections.
464
191
  // This is generally not expected for MongoDB source dbs, so we log an error.
465
192
  if (result.dropTables.length > 0) {
@@ -470,17 +197,12 @@ export class ChangeStream {
470
197
  // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
471
198
  // 2. Snapshot is not already done, AND:
472
199
  // 3. The table is used in sync config.
473
- const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
474
- if (shouldSnapshot) {
200
+ const snapshotCandidates = result.tables.filter((table) => snapshot && !table.snapshotComplete && table.syncAny);
201
+ if (snapshotCandidates.length > 0) {
475
202
  this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
476
- // Truncate this table, in case a previous snapshot was interrupted.
477
- await batch.truncate([result.table]);
478
- await this.snapshotTable(batch, result.table);
479
- const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
480
- const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn);
481
- return table;
482
- }
483
- return result.table;
203
+ await this.snapshotter.snapshotTables(batch, snapshotCandidates);
204
+ }
205
+ return result.tables;
484
206
  }
485
207
  async writeChange(batch, table, change) {
486
208
  if (!table.syncAny) {
@@ -535,34 +257,60 @@ export class ChangeStream {
535
257
  }
536
258
  }
537
259
  async replicate() {
260
+ let streamPromise = null;
261
+ let loopPromise = null;
538
262
  try {
539
263
  // If anything errors here, the entire replication process is halted, and
540
264
  // all connections automatically closed, including this one.
541
- await this.initReplication();
542
- await this.streamChanges();
265
+ this.initPromise = this.initReplication();
266
+ await this.initPromise;
267
+ loopPromise = this.snapshotter
268
+ .replicationLoop()
269
+ .then(() => {
270
+ throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`);
271
+ })
272
+ .catch((e) => {
273
+ this.abortController.abort(e);
274
+ throw e;
275
+ });
276
+ if (!this.snapshotter.supportsConcurrentSnapshots) {
277
+ await Promise.race([this.snapshotter.waitForInitialSnapshot(), loopPromise]);
278
+ }
279
+ streamPromise = this.streamChanges()
280
+ .then(() => {
281
+ throw new ReplicationAssertionError(`Replication stream exited unexpectedly`);
282
+ })
283
+ .catch((e) => {
284
+ this.abortController.abort(e);
285
+ throw e;
286
+ });
287
+ const results = await Promise.allSettled([loopPromise, streamPromise]);
288
+ throw replicationLoopError(results);
543
289
  }
544
290
  catch (e) {
545
291
  await this.storage.reportError(e);
546
292
  throw e;
547
293
  }
294
+ finally {
295
+ this.abortController.abort();
296
+ }
297
+ }
298
+ async waitForInitialSnapshot() {
299
+ if (this.initPromise == null) {
300
+ throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()');
301
+ }
302
+ await this.initPromise;
303
+ await this.snapshotter.waitForInitialSnapshot();
548
304
  }
549
305
  async initReplication() {
550
- const result = await this.initSlot();
551
- await this.setupCheckpointsCollection();
306
+ const result = await this.snapshotter.checkSlot();
307
+ await this.snapshotter.setupCheckpointsCollection();
552
308
  if (result.needsInitialSync) {
553
309
  if (result.snapshotLsn == null) {
554
310
  // Snapshot LSN is not present, so we need to start replication from scratch.
555
- await this.storage.clear({ signal: this.abort_signal });
556
- }
557
- const { lastOpId } = await this.initialReplication(result.snapshotLsn);
558
- if (lastOpId != null) {
559
- // Populate the cache _after_ initial replication, but _before_ we switch to this replication stream.
560
- await this.storage.populatePersistentChecksumCache({
561
- signal: this.abort_signal,
562
- // No checkpoint yet, but we do have the opId.
563
- maxOpId: lastOpId
564
- });
311
+ await this.storage.clear({ signal: this.abortSignal });
565
312
  }
313
+ await this.snapshotter.queueSnapshotTables(result.snapshotLsn);
566
314
  }
567
315
  }
568
316
  async streamChanges() {
@@ -649,6 +397,7 @@ export class ChangeStream {
649
397
  defaultSchema: this.defaultDb.databaseName,
650
398
  // We get a complete postimage for every change, so we don't need to store the current data.
651
399
  storeCurrentData: false,
400
+ hooks: this.storageHooks,
652
401
  tracer
653
402
  }, async (batch) => {
654
403
  const { resumeFromLsn } = batch;
@@ -667,7 +416,7 @@ export class ChangeStream {
667
416
  const batchStream = this.rawChangeStreamBatches({
668
417
  lsn: resumeFromLsn,
669
418
  filters,
670
- signal: this.abort_signal,
419
+ signal: this.abortSignal,
671
420
  tracer
672
421
  });
673
422
  // Always start with a checkpoint.
@@ -683,7 +432,7 @@ export class ChangeStream {
683
432
  using batchSpan = tracer.span('processing');
684
433
  bytesReplicatedMetric.add(eventBatch.byteSize);
685
434
  chunksReplicatedMetric.add(1);
686
- if (this.abort_signal.aborted) {
435
+ if (this.abortSignal.aborted) {
687
436
  break;
688
437
  }
689
438
  this.touch();
@@ -712,7 +461,7 @@ export class ChangeStream {
712
461
  for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
713
462
  const rawChangeDocument = events[eventIndex];
714
463
  const originalChangeDocument = parseChangeDocument(rawChangeDocument);
715
- if (this.abort_signal.aborted) {
464
+ if (this.abortSignal.aborted) {
716
465
  break;
717
466
  }
718
467
  if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
@@ -799,12 +548,19 @@ export class ChangeStream {
799
548
  // change stream events, collapse standalone checkpoints into the normal batch
800
549
  // checkpoint flow to avoid commit churn under sustained load.
801
550
  const hasBufferedChanges = eventIndex < events.length - 1;
802
- if (waitForCheckpointLsn != null || hasBufferedChanges) {
803
- if (waitForCheckpointLsn == null) {
804
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
805
- }
551
+ if (hasBufferedChanges && waitForCheckpointLsn == null) {
552
+ // Buffered changes - create a new batch checkpoint to rate limit commits
553
+ using _ = tracer.span('source_checkpoint');
554
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
555
+ continue;
556
+ }
557
+ else if (waitForCheckpointLsn != null) {
558
+ // Skip this checkpoint - wait for the batch checkpoint.
806
559
  continue;
807
560
  }
561
+ else {
562
+ // No buffered changes, and no batch checkpoint pending - commit immediately.
563
+ }
808
564
  }
809
565
  else if (!this.checkpointStreamId.equals(checkpointId)) {
810
566
  continue;
@@ -836,17 +592,19 @@ export class ChangeStream {
836
592
  changeDocument.operationType == 'replace' ||
837
593
  changeDocument.operationType == 'delete') {
838
594
  if (waitForCheckpointLsn == null) {
595
+ using _ = tracer.span('source_checkpoint');
839
596
  waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
840
597
  }
841
- const rel = getMongoRelation(changeDocument.ns);
842
- const table = await this.getRelation(batch, rel, {
598
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
599
+ const tables = await this.getRelations(batch, rel, {
843
600
  // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
844
601
  // for whatever reason, then we do need to snapshot it.
845
602
  // This may result in some duplicate operations when a collection is created for the first time after
846
603
  // sync config was deployed.
847
604
  snapshot: true
848
605
  });
849
- if (table.syncAny) {
606
+ const tablesToReplicate = tables.filter((table) => table.syncAny);
607
+ if (tablesToReplicate.length > 0) {
850
608
  this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
851
609
  const transactionKeyValue = transactionKey(changeDocument);
852
610
  if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
@@ -856,31 +614,35 @@ export class ChangeStream {
856
614
  lastTxnKey = transactionKeyValue;
857
615
  transactionsReplicatedMetric.add(1);
858
616
  }
859
- await this.writeChange(batch, table, changeDocument);
617
+ for (const table of tablesToReplicate) {
618
+ await this.writeChange(batch, table, changeDocument);
619
+ }
860
620
  }
861
621
  }
862
622
  else if (changeDocument.operationType == 'drop') {
863
- const rel = getMongoRelation(changeDocument.ns);
864
- const table = await this.getRelation(batch, rel, {
623
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
624
+ const tables = await this.getRelations(batch, rel, {
865
625
  // We're "dropping" this collection, so never snapshot it.
866
626
  snapshot: false
867
627
  });
868
- if (table.syncAny) {
869
- await batch.drop([table]);
870
- this.relationCache.delete(table);
628
+ const tablesToDrop = tables.filter((table) => table.syncAny);
629
+ if (tablesToDrop.length > 0) {
630
+ await batch.drop(tablesToDrop);
871
631
  }
632
+ this.relationCache.delete(rel);
872
633
  }
873
634
  else if (changeDocument.operationType == 'rename') {
874
- const relFrom = getMongoRelation(changeDocument.ns);
875
- const relTo = getMongoRelation(changeDocument.to);
876
- const tableFrom = await this.getRelation(batch, relFrom, {
635
+ const relFrom = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
636
+ const relTo = getMongoRelation(changeDocument.to, this.connections.connectionTag);
637
+ const tablesFrom = await this.getRelations(batch, relFrom, {
877
638
  // We're "dropping" this collection, so never snapshot it.
878
639
  snapshot: false
879
640
  });
880
- if (tableFrom.syncAny) {
881
- await batch.drop([tableFrom]);
882
- this.relationCache.delete(relFrom);
641
+ const tablesToDrop = tablesFrom.filter((table) => table.syncAny);
642
+ if (tablesToDrop.length > 0) {
643
+ await batch.drop(tablesToDrop);
883
644
  }
645
+ this.relationCache.delete(relFrom);
884
646
  // Here we do need to snapshot the new table
885
647
  const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
886
648
  await this.handleRelation(batch, relTo, {
@@ -901,17 +663,18 @@ export class ChangeStream {
901
663
  await batch.setResumeLsn(lsn);
902
664
  }
903
665
  batchSpan.end();
904
- const durations = outerSpan.end();
905
- const duration = batchSpan.endAt - batchSpan.startAt;
666
+ const durationsMicroseconds = outerSpan.end();
667
+ const duration = batchSpan.durationMillis;
906
668
  this.logger.info(`Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`, {
907
669
  count: events.length,
908
670
  bytes: eventBatch.byteSize,
909
671
  duration,
910
- t: durations
672
+ t: durationsMicroseconds
911
673
  });
912
674
  outerSpan = tracer.span('batch');
913
675
  }
914
676
  });
677
+ throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason);
915
678
  }
916
679
  getReplicationLagMillis() {
917
680
  return this.replicationLag.getLagMillis();
@@ -936,4 +699,26 @@ function transactionKey(doc) {
936
699
  }
937
700
  return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
938
701
  }
702
+ /**
703
+ * Prioritize errors that are _not_ ReplicationAbortedError. Any error on either loopPromise or
704
+ * streamPromise aborts the other one, which then results in a ReplicationAbortedError, hiding the
705
+ * original cause.
706
+ */
707
+ function replicationLoopError(results) {
708
+ // 1. Prioritize not ReplicationAbortedError.
709
+ for (const result of results) {
710
+ if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) {
711
+ return result.reason;
712
+ }
713
+ }
714
+ // 2. Fallback to ReplicationAbortedError.
715
+ for (const result of results) {
716
+ if (result.status == 'rejected') {
717
+ // At this point only ReplicationAbortedError remains
718
+ return result.reason;
719
+ }
720
+ }
721
+ // 3. Should never happen, but we cover this case.
722
+ return new ReplicationAssertionError(`Replication loop exited unexpectedly`);
723
+ }
939
724
  //# sourceMappingURL=ChangeStream.js.map