@powersync/service-module-mongodb 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ import {
18
18
  SourceTable,
19
19
  storage
20
20
  } from '@powersync/service-core';
21
- import { HydratedSyncRules, TablePattern } from '@powersync/service-sync-rules';
21
+ import { HydratedSyncConfig } from '@powersync/service-sync-rules';
22
22
  import { ReplicationMetric } from '@powersync/service-types';
23
23
  import { performance } from 'node:perf_hooks';
24
24
  import { MongoLSN } from '../common/MongoLSN.js';
@@ -26,7 +26,7 @@ import { PostImagesOption } from '../types/types.js';
26
26
  import { escapeRegExp } from '../utils.js';
27
27
  import { MongoManager } from './MongoManager.js';
28
28
  import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
29
- import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
29
+ import { MongoSnapshotter, MongoSnapshotterHooks } from './MongoSnapshotter.js';
30
30
  import {
31
31
  ChangeStreamBatch,
32
32
  parseChangeDocument,
@@ -53,12 +53,10 @@ export interface ChangeStreamOptions {
53
53
  */
54
54
  snapshotChunkLength?: number;
55
55
 
56
- logger?: Logger;
57
- }
56
+ storageHooks?: storage.StorageHooks;
57
+ snapshotHooks?: MongoSnapshotterHooks;
58
58
 
59
- interface InitResult {
60
- needsInitialSync: boolean;
61
- snapshotLsn: string | null;
59
+ logger?: Logger;
62
60
  }
63
61
 
64
62
  /**
@@ -76,7 +74,7 @@ export class ChangeStreamInvalidatedError extends DatabaseConnectionError {
76
74
  }
77
75
 
78
76
  export class ChangeStream {
79
- sync_rules: HydratedSyncRules;
77
+ sync_rules: HydratedSyncConfig;
80
78
  group_id: number;
81
79
 
82
80
  connection_id = 1;
@@ -90,8 +88,15 @@ export class ChangeStream {
90
88
 
91
89
  private readonly maxAwaitTimeMS: number;
92
90
 
93
- private abort_signal: AbortSignal;
91
+ private abortController = new AbortController();
92
+ private abortSignal: AbortSignal = this.abortController.signal;
94
93
 
94
+ private initPromise: Promise<void> | null = null;
95
+ private snapshotter: MongoSnapshotter;
96
+
97
+ /**
98
+ * We use the relationCache _only_ for caching static SourceTable info, not for snapshot status.
99
+ */
95
100
  private relationCache = new RelationCache(getCacheIdentifier);
96
101
 
97
102
  private replicationLag = new ReplicationLagTracker();
@@ -104,6 +109,8 @@ export class ChangeStream {
104
109
 
105
110
  private changeStreamTimeout: number;
106
111
 
112
+ private storageHooks: storage.StorageHooks | undefined;
113
+
107
114
  private readonly sourceRowConverter: SourceRowConverter;
108
115
 
109
116
  constructor(options: ChangeStreamOptions) {
@@ -113,6 +120,7 @@ export class ChangeStream {
113
120
  this.connections = options.connections;
114
121
  this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
115
122
  this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
123
+ this.storageHooks = options.storageHooks;
116
124
  this.client = this.connections.client;
117
125
  this.defaultDb = this.connections.db;
118
126
  this.sync_rules = options.storage.getParsedSyncRules({
@@ -124,20 +132,28 @@ export class ChangeStream {
124
132
  // so we use 90% of the socket timeout value.
125
133
  this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
126
134
 
127
- this.abort_signal = options.abort_signal;
128
- this.abort_signal.addEventListener(
135
+ this.logger = options.logger ?? this.storage.logger;
136
+ this.snapshotter = new MongoSnapshotter({
137
+ ...options,
138
+ abortSignal: this.abortSignal,
139
+ logger: this.logger,
140
+ checkpointStreamId: this.checkpointStreamId
141
+ });
142
+
143
+ options.abort_signal.addEventListener(
129
144
  'abort',
130
145
  () => {
131
- // TODO: Fast abort?
146
+ this.abortController.abort(options.abort_signal.reason);
132
147
  },
133
148
  { once: true }
134
149
  );
135
-
136
- this.logger = options.logger ?? this.storage.logger;
150
+ if (options.abort_signal.aborted) {
151
+ this.abortController.abort(options.abort_signal.reason);
152
+ }
137
153
  }
138
154
 
139
155
  get stopped() {
140
- return this.abort_signal.aborted;
156
+ return this.abortSignal.aborted;
141
157
  }
142
158
 
143
159
  private get usePostImages() {
@@ -148,279 +164,6 @@ export class ChangeStream {
148
164
  return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
149
165
  }
150
166
 
151
- /**
152
- * This resolves a pattern, persists the related metadata, and returns
153
- * the resulting SourceTables.
154
- *
155
- * This implicitly checks the collection postImage configuration.
156
- */
157
- async resolveQualifiedTableNames(
158
- batch: storage.BucketStorageBatch,
159
- tablePattern: TablePattern
160
- ): Promise<storage.SourceTable[]> {
161
- const schema = tablePattern.schema;
162
- if (tablePattern.connectionTag != this.connections.connectionTag) {
163
- return [];
164
- }
165
-
166
- let nameFilter: RegExp | string;
167
- if (tablePattern.isWildcard) {
168
- nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix));
169
- } else {
170
- nameFilter = tablePattern.name;
171
- }
172
- let result: storage.SourceTable[] = [];
173
-
174
- // Check if the collection exists
175
- const collections = await this.client
176
- .db(schema)
177
- .listCollections(
178
- {
179
- name: nameFilter
180
- },
181
- { nameOnly: false }
182
- )
183
- .toArray();
184
-
185
- if (!tablePattern.isWildcard && collections.length == 0) {
186
- this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
187
- }
188
-
189
- for (let collection of collections) {
190
- const table = await this.handleRelation(
191
- batch,
192
- getMongoRelation({ db: schema, coll: collection.name }),
193
- // This is done as part of the initial setup - snapshot is handled elsewhere
194
- { snapshot: false, collectionInfo: collection }
195
- );
196
-
197
- result.push(table);
198
- }
199
-
200
- return result;
201
- }
202
-
203
- async initSlot(): Promise<InitResult> {
204
- const status = await this.storage.getStatus();
205
- if (status.snapshot_done && status.checkpoint_lsn) {
206
- this.logger.info(`Initial replication already done`);
207
- return { needsInitialSync: false, snapshotLsn: null };
208
- }
209
-
210
- return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
211
- }
212
-
213
- async estimatedCount(table: storage.SourceTable): Promise<string> {
214
- const count = await this.estimatedCountNumber(table);
215
- return `~${count}`;
216
- }
217
-
218
- async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
219
- const db = this.client.db(table.schema);
220
- return await db.collection(table.name).estimatedDocumentCount();
221
- }
222
-
223
- /**
224
- * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
225
- *
226
- * This LSN can survive initial replication restarts.
227
- */
228
- private async getSnapshotLsn(): Promise<string> {
229
- const hello = await this.defaultDb.command({ hello: 1 });
230
- // Basic sanity check
231
- if (hello.msg == 'isdbgrid') {
232
- throw new ServiceError(
233
- ErrorCode.PSYNC_S1341,
234
- 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).'
235
- );
236
- } else if (hello.setName == null) {
237
- throw new ServiceError(
238
- ErrorCode.PSYNC_S1342,
239
- 'Standalone MongoDB instances are not supported - use a replicaset.'
240
- );
241
- }
242
-
243
- // Open a change stream just to get a resume token for later use.
244
- // We could use clusterTime from the hello command, but that won't tell us if the
245
- // snapshot isn't valid anymore.
246
- // If we just use the first resumeToken from the stream, we get two potential issues:
247
- // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
248
- // in source db or other stream issues.
249
- // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
250
- // skip that event.
251
- // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
252
- // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
253
- // periodically until the timeout is reached.
254
-
255
- const LSN_TIMEOUT_SECONDS = 60;
256
- const LSN_CREATE_INTERVAL_SECONDS = 1;
257
-
258
- // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
259
- const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
260
-
261
- const startTime = performance.now();
262
- let lastCheckpointCreated = performance.now();
263
- let eventsSeen = 0;
264
- let batchesSeen = 0;
265
-
266
- const filters = this.getSourceNamespaceFilters();
267
- const iter = this.rawChangeStreamBatches({
268
- lsn: firstCheckpointLsn,
269
- maxAwaitTimeMS: 0,
270
- signal: this.abort_signal,
271
- filters
272
- });
273
- for await (let { events } of iter) {
274
- if (performance.now() - startTime >= LSN_TIMEOUT_SECONDS * 1000) {
275
- break;
276
- }
277
- if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
278
- await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
279
- lastCheckpointCreated = performance.now();
280
- }
281
- batchesSeen += 1;
282
-
283
- for (let rawChangeDocument of events) {
284
- const changeDocument = parseChangeDocument(rawChangeDocument);
285
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
286
-
287
- if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
288
- const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
289
- if (!this.checkpointStreamId.equals(checkpointId)) {
290
- continue;
291
- }
292
- const { comparable: lsn } = new MongoLSN({
293
- timestamp: changeDocument.clusterTime!,
294
- resume_token: changeDocument._id
295
- });
296
- return lsn;
297
- }
298
-
299
- eventsSeen += 1;
300
- }
301
- }
302
-
303
- // Could happen if there is a very large replication lag?
304
- throw new ServiceError(
305
- ErrorCode.PSYNC_S1301,
306
- `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}, batches = ${batchesSeen}`
307
- );
308
- }
309
-
310
- /**
311
- * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
312
- */
313
- private async validateSnapshotLsn(lsn: string) {
314
- const filters = this.getSourceNamespaceFilters();
315
- const stream = this.rawChangeStreamBatches({
316
- lsn: lsn,
317
- // maxAwaitTimeMS should never actually be used here
318
- maxAwaitTimeMS: 0,
319
- filters
320
- });
321
- for await (let _batch of stream) {
322
- // We got a response from the aggregate command, so consider the LSN valid.
323
- // Close the stream immediately.
324
- break;
325
- }
326
- }
327
-
328
- async initialReplication(snapshotLsn: string | null) {
329
- const sourceTables = this.sync_rules.getSourceTables();
330
- await this.client.connect();
331
- const tracer = new PerformanceTracer('MongoDB initial replication');
332
-
333
- const flushResult = await this.storage.startBatch(
334
- {
335
- logger: this.logger,
336
- zeroLSN: MongoLSN.ZERO.comparable,
337
- defaultSchema: this.defaultDb.databaseName,
338
- storeCurrentData: false,
339
- skipExistingRows: true,
340
- tracer
341
- },
342
- async (batch) => {
343
- if (snapshotLsn == null) {
344
- // First replication attempt - get a snapshot and store the timestamp
345
- snapshotLsn = await this.getSnapshotLsn();
346
- await batch.setResumeLsn(snapshotLsn);
347
- this.logger.info(`Marking snapshot at ${snapshotLsn}`);
348
- } else {
349
- this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
350
- // Check that the snapshot is still valid.
351
- await this.validateSnapshotLsn(snapshotLsn);
352
- }
353
-
354
- // Start by resolving all tables.
355
- // This checks postImage configuration, and that should fail as
356
- // early as possible.
357
- let allSourceTables: SourceTable[] = [];
358
- for (let tablePattern of sourceTables) {
359
- const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
360
- allSourceTables.push(...tables);
361
- }
362
-
363
- let tablesWithStatus: SourceTable[] = [];
364
- for (let table of allSourceTables) {
365
- if (table.snapshotComplete) {
366
- this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
367
- continue;
368
- }
369
- let count = await this.estimatedCountNumber(table);
370
- const updated = await batch.updateTableProgress(table, {
371
- totalEstimatedCount: count
372
- });
373
- tablesWithStatus.push(updated);
374
- this.relationCache.update(updated);
375
- this.logger.info(
376
- `To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
377
- );
378
- }
379
-
380
- for (let table of tablesWithStatus) {
381
- await this.snapshotTable(batch, table);
382
- await batch.markTableSnapshotDone([table]);
383
-
384
- this.touch();
385
- }
386
-
387
- // The checkpoint here is a marker - we need to replicate up to at least this
388
- // point before the data can be considered consistent.
389
- // We could do this for each individual table, but may as well just do it once for the entire snapshot.
390
- const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
391
- await batch.markAllSnapshotDone(checkpoint);
392
-
393
- // This will not create a consistent checkpoint yet, but will persist the op.
394
- // Actual checkpoint will be created when streaming replication caught up.
395
- await batch.commit(snapshotLsn);
396
-
397
- this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
398
- }
399
- );
400
- return { lastOpId: flushResult?.flushed_op };
401
- }
402
-
403
- private async setupCheckpointsCollection() {
404
- const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
405
- if (collection == null) {
406
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
407
- changeStreamPreAndPostImages: { enabled: true }
408
- });
409
- } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
410
- // Drop + create requires less permissions than collMod,
411
- // and we don't care about the data in this collection.
412
- await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
413
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
414
- changeStreamPreAndPostImages: { enabled: true }
415
- });
416
- } else {
417
- // Clear the collection on startup, to keep it clean
418
- // We never query this collection directly, and don't want to keep the data around.
419
- // We only use this to get data into the oplog/changestream.
420
- await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
421
- }
422
- }
423
-
424
167
  private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } {
425
168
  const sourceTables = this.sync_rules.getSourceTables();
426
169
 
@@ -472,89 +215,14 @@ export class ChangeStream {
472
215
  return { $match: nsFilter, multipleDatabases };
473
216
  }
474
217
 
475
- private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
476
- const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
477
- const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
478
- const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
479
-
480
- const totalEstimatedCount = await this.estimatedCountNumber(table);
481
- let at = table.snapshotStatus?.replicatedCount ?? 0;
482
- const db = this.client.db(table.schema);
483
- const collection = db.collection(table.name);
484
- await using query = new ChunkedSnapshotQuery({
485
- collection,
486
- key: table.snapshotStatus?.lastKey,
487
- batchSize: this.snapshotChunkLength
488
- });
489
- if (query.lastKey != null) {
490
- this.logger.info(
491
- `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
492
- );
493
- } else {
494
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
495
- }
496
-
497
- let lastBatch = performance.now();
498
- let nextChunkPromise = query.nextChunk();
499
- while (true) {
500
- const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
501
- if (docBatch.length == 0) {
502
- // No more data - stop iterating
503
- break;
504
- }
505
- bytesReplicatedMetric.add(chunkBytes);
506
- chunksReplicatedMetric.add(1);
507
-
508
- if (this.abort_signal.aborted) {
509
- throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
510
- }
511
-
512
- // Pre-fetch next batch, so that we can read and write concurrently
513
- nextChunkPromise = query.nextChunk();
514
- for (let buffer of docBatch) {
515
- const { row: record, replicaId: replicaId } = this.rawToSqliteRow(buffer);
516
-
517
- // This auto-flushes when the batch reaches its size limit
518
- await batch.save({
519
- tag: SaveOperationTag.INSERT,
520
- sourceTable: table,
521
- before: undefined,
522
- beforeReplicaId: undefined,
523
- after: record,
524
- afterReplicaId: replicaId
525
- });
526
- }
527
-
528
- // Important: flush before marking progress
529
- await batch.flush();
530
- at += docBatch.length;
531
- rowsReplicatedMetric.add(docBatch.length);
532
-
533
- table = await batch.updateTableProgress(table, {
534
- lastKey,
535
- replicatedCount: at,
536
- totalEstimatedCount: totalEstimatedCount
537
- });
538
- this.relationCache.update(table);
539
-
540
- const duration = performance.now() - lastBatch;
541
- lastBatch = performance.now();
542
- this.logger.info(
543
- `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
544
- );
545
- this.touch();
546
- }
547
- // In case the loop was interrupted, make sure we await the last promise.
548
- await nextChunkPromise;
549
- }
550
-
551
- private async getRelation(
218
+ private async getRelations(
552
219
  batch: storage.BucketStorageBatch,
553
220
  descriptor: SourceEntityDescriptor,
554
221
  options: { snapshot: boolean }
555
- ): Promise<SourceTable> {
556
- const existing = this.relationCache.get(descriptor);
222
+ ): Promise<SourceTable[]> {
223
+ const existing = this.relationCache.getAll(descriptor);
557
224
  if (existing != null) {
225
+ // We do this even when it's an empty result: Empty means nothing to sync, and we don't need to re-resolve.
558
226
  return existing;
559
227
  }
560
228
 
@@ -612,14 +280,11 @@ export class ChangeStream {
612
280
  }
613
281
 
614
282
  const snapshot = options.snapshot;
615
- const result = await this.storage.resolveTable({
616
- group_id: this.group_id,
283
+ const result = await batch.resolveTables({
617
284
  connection_id: this.connection_id,
618
- connection_tag: this.connections.connectionTag,
619
- entity_descriptor: descriptor,
620
- sync_rules: this.sync_rules
285
+ source: descriptor
621
286
  });
622
- this.relationCache.update(result.table);
287
+ this.relationCache.updateAll(descriptor, result.tables);
623
288
 
624
289
  // Drop conflicting collections.
625
290
  // This is generally not expected for MongoDB source dbs, so we log an error.
@@ -634,20 +299,13 @@ export class ChangeStream {
634
299
  // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
635
300
  // 2. Snapshot is not already done, AND:
636
301
  // 3. The table is used in sync config.
637
- const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
638
- if (shouldSnapshot) {
302
+ const snapshotCandidates = result.tables.filter((table) => snapshot && !table.snapshotComplete && table.syncAny);
303
+ if (snapshotCandidates.length > 0) {
639
304
  this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
640
- // Truncate this table, in case a previous snapshot was interrupted.
641
- await batch.truncate([result.table]);
642
-
643
- await this.snapshotTable(batch, result.table);
644
- const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
645
-
646
- const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn);
647
- return table;
305
+ await this.snapshotter.snapshotTables(batch, snapshotCandidates);
648
306
  }
649
307
 
650
- return result.table;
308
+ return result.tables;
651
309
  }
652
310
 
653
311
  async writeChange(
@@ -706,38 +364,65 @@ export class ChangeStream {
706
364
  }
707
365
 
708
366
  async replicate() {
367
+ let streamPromise: Promise<void> | null = null;
368
+ let loopPromise: Promise<void> | null = null;
709
369
  try {
710
370
  // If anything errors here, the entire replication process is halted, and
711
371
  // all connections automatically closed, including this one.
712
- await this.initReplication();
713
- await this.streamChanges();
372
+ this.initPromise = this.initReplication();
373
+ await this.initPromise;
374
+ loopPromise = this.snapshotter
375
+ .replicationLoop()
376
+ .then(() => {
377
+ throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`);
378
+ })
379
+ .catch((e) => {
380
+ this.abortController.abort(e);
381
+ throw e;
382
+ });
383
+ if (!this.snapshotter.supportsConcurrentSnapshots) {
384
+ await Promise.race([this.snapshotter.waitForInitialSnapshot(), loopPromise]);
385
+ }
386
+ streamPromise = this.streamChanges()
387
+ .then(() => {
388
+ throw new ReplicationAssertionError(`Replication stream exited unexpectedly`);
389
+ })
390
+ .catch((e) => {
391
+ this.abortController.abort(e);
392
+ throw e;
393
+ });
394
+
395
+ const results = await Promise.allSettled([loopPromise, streamPromise]);
396
+ throw replicationLoopError(results);
714
397
  } catch (e) {
715
398
  await this.storage.reportError(e);
716
399
  throw e;
400
+ } finally {
401
+ this.abortController.abort();
402
+ }
403
+ }
404
+
405
+ public async waitForInitialSnapshot() {
406
+ if (this.initPromise == null) {
407
+ throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()');
717
408
  }
409
+ await this.initPromise;
410
+ await this.snapshotter.waitForInitialSnapshot();
718
411
  }
719
412
 
720
- async initReplication() {
721
- const result = await this.initSlot();
722
- await this.setupCheckpointsCollection();
413
+ private async initReplication() {
414
+ const result = await this.snapshotter.checkSlot();
415
+ await this.snapshotter.setupCheckpointsCollection();
723
416
  if (result.needsInitialSync) {
724
417
  if (result.snapshotLsn == null) {
725
418
  // Snapshot LSN is not present, so we need to start replication from scratch.
726
- await this.storage.clear({ signal: this.abort_signal });
727
- }
728
- const { lastOpId } = await this.initialReplication(result.snapshotLsn);
729
- if (lastOpId != null) {
730
- // Populate the cache _after_ initial replication, but _before_ we switch to this replication stream.
731
- await this.storage.populatePersistentChecksumCache({
732
- signal: this.abort_signal,
733
- // No checkpoint yet, but we do have the opId.
734
- maxOpId: lastOpId
735
- });
419
+ await this.storage.clear({ signal: this.abortSignal });
736
420
  }
421
+ await this.snapshotter.queueSnapshotTables(result.snapshotLsn);
737
422
  }
738
423
  }
739
424
 
740
- async streamChanges() {
425
+ private async streamChanges() {
741
426
  try {
742
427
  await this.streamChangesInternal();
743
428
  } catch (e) {
@@ -830,7 +515,9 @@ export class ChangeStream {
830
515
  const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
831
516
  const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
832
517
 
833
- const tracer = new PerformanceTracer('MongoDB streaming replication');
518
+ const tracer = new PerformanceTracer<
519
+ 'storage' | 'evaluate' | 'batch' | 'source_checkpoint' | 'changestream' | 'processing'
520
+ >('MongoDB streaming replication');
834
521
  await this.storage.startBatch(
835
522
  {
836
523
  logger: this.logger,
@@ -838,6 +525,7 @@ export class ChangeStream {
838
525
  defaultSchema: this.defaultDb.databaseName,
839
526
  // We get a complete postimage for every change, so we don't need to store the current data.
840
527
  storeCurrentData: false,
528
+ hooks: this.storageHooks,
841
529
  tracer
842
530
  },
843
531
  async (batch) => {
@@ -860,7 +548,7 @@ export class ChangeStream {
860
548
  const batchStream = this.rawChangeStreamBatches({
861
549
  lsn: resumeFromLsn,
862
550
  filters,
863
- signal: this.abort_signal,
551
+ signal: this.abortSignal,
864
552
  tracer
865
553
  });
866
554
 
@@ -886,7 +574,7 @@ export class ChangeStream {
886
574
 
887
575
  bytesReplicatedMetric.add(eventBatch.byteSize);
888
576
  chunksReplicatedMetric.add(1);
889
- if (this.abort_signal.aborted) {
577
+ if (this.abortSignal.aborted) {
890
578
  break;
891
579
  }
892
580
  this.touch();
@@ -920,7 +608,7 @@ export class ChangeStream {
920
608
  for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
921
609
  const rawChangeDocument = events[eventIndex];
922
610
  const originalChangeDocument = parseChangeDocument(rawChangeDocument);
923
- if (this.abort_signal.aborted) {
611
+ if (this.abortSignal.aborted) {
924
612
  break;
925
613
  }
926
614
 
@@ -1027,11 +715,16 @@ export class ChangeStream {
1027
715
  // change stream events, collapse standalone checkpoints into the normal batch
1028
716
  // checkpoint flow to avoid commit churn under sustained load.
1029
717
  const hasBufferedChanges = eventIndex < events.length - 1;
1030
- if (waitForCheckpointLsn != null || hasBufferedChanges) {
1031
- if (waitForCheckpointLsn == null) {
1032
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1033
- }
718
+ if (hasBufferedChanges && waitForCheckpointLsn == null) {
719
+ // Buffered changes - create a new batch checkpoint to rate limit commits
720
+ using _ = tracer.span('source_checkpoint');
721
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1034
722
  continue;
723
+ } else if (waitForCheckpointLsn != null) {
724
+ // Skip this checkpoint - wait for the batch checkpoint.
725
+ continue;
726
+ } else {
727
+ // No buffered changes, and no batch checkpoint pending - commit immediately.
1035
728
  }
1036
729
  } else if (!this.checkpointStreamId.equals(checkpointId)) {
1037
730
  continue;
@@ -1068,18 +761,20 @@ export class ChangeStream {
1068
761
  changeDocument.operationType == 'delete'
1069
762
  ) {
1070
763
  if (waitForCheckpointLsn == null) {
764
+ using _ = tracer.span('source_checkpoint');
1071
765
  waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1072
766
  }
1073
767
 
1074
- const rel = getMongoRelation(changeDocument.ns);
1075
- const table = await this.getRelation(batch, rel, {
768
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
769
+ const tables = await this.getRelations(batch, rel, {
1076
770
  // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
1077
771
  // for whatever reason, then we do need to snapshot it.
1078
772
  // This may result in some duplicate operations when a collection is created for the first time after
1079
773
  // sync config was deployed.
1080
774
  snapshot: true
1081
775
  });
1082
- if (table.syncAny) {
776
+ const tablesToReplicate = tables.filter((table) => table.syncAny);
777
+ if (tablesToReplicate.length > 0) {
1083
778
  this.replicationLag.trackUncommittedChange(
1084
779
  changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime)
1085
780
  );
@@ -1094,29 +789,33 @@ export class ChangeStream {
1094
789
  transactionsReplicatedMetric.add(1);
1095
790
  }
1096
791
 
1097
- await this.writeChange(batch, table, changeDocument);
792
+ for (const table of tablesToReplicate) {
793
+ await this.writeChange(batch, table, changeDocument);
794
+ }
1098
795
  }
1099
796
  } else if (changeDocument.operationType == 'drop') {
1100
- const rel = getMongoRelation(changeDocument.ns);
1101
- const table = await this.getRelation(batch, rel, {
797
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
798
+ const tables = await this.getRelations(batch, rel, {
1102
799
  // We're "dropping" this collection, so never snapshot it.
1103
800
  snapshot: false
1104
801
  });
1105
- if (table.syncAny) {
1106
- await batch.drop([table]);
1107
- this.relationCache.delete(table);
802
+ const tablesToDrop = tables.filter((table) => table.syncAny);
803
+ if (tablesToDrop.length > 0) {
804
+ await batch.drop(tablesToDrop);
1108
805
  }
806
+ this.relationCache.delete(rel);
1109
807
  } else if (changeDocument.operationType == 'rename') {
1110
- const relFrom = getMongoRelation(changeDocument.ns);
1111
- const relTo = getMongoRelation(changeDocument.to);
1112
- const tableFrom = await this.getRelation(batch, relFrom, {
808
+ const relFrom = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
809
+ const relTo = getMongoRelation(changeDocument.to, this.connections.connectionTag);
810
+ const tablesFrom = await this.getRelations(batch, relFrom, {
1113
811
  // We're "dropping" this collection, so never snapshot it.
1114
812
  snapshot: false
1115
813
  });
1116
- if (tableFrom.syncAny) {
1117
- await batch.drop([tableFrom]);
1118
- this.relationCache.delete(relFrom);
814
+ const tablesToDrop = tablesFrom.filter((table) => table.syncAny);
815
+ if (tablesToDrop.length > 0) {
816
+ await batch.drop(tablesToDrop);
1119
817
  }
818
+ this.relationCache.delete(relFrom);
1120
819
  // Here we do need to snapshot the new table
1121
820
  const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
1122
821
  await this.handleRelation(batch, relTo, {
@@ -1139,8 +838,8 @@ export class ChangeStream {
1139
838
  }
1140
839
 
1141
840
  batchSpan.end();
1142
- const durations = outerSpan.end();
1143
- const duration = batchSpan.endAt - batchSpan.startAt;
841
+ const durationsMicroseconds = outerSpan.end();
842
+ const duration = batchSpan.durationMillis;
1144
843
 
1145
844
  this.logger.info(
1146
845
  `Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`,
@@ -1148,13 +847,15 @@ export class ChangeStream {
1148
847
  count: events.length,
1149
848
  bytes: eventBatch.byteSize,
1150
849
  duration,
1151
- t: durations
850
+ t: durationsMicroseconds
1152
851
  }
1153
852
  );
1154
853
  outerSpan = tracer.span('batch');
1155
854
  }
1156
855
  }
1157
856
  );
857
+
858
+ throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason);
1158
859
  }
1159
860
 
1160
861
  getReplicationLagMillis(): number | undefined {
@@ -1183,3 +884,26 @@ function transactionKey(doc: Pick<mongo.ChangeStreamDocument, 'lsid' | 'txnNumbe
1183
884
  }
1184
885
  return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
1185
886
  }
887
+
888
+ /**
889
+ * Prioritize errors that are _not_ ReplicationAbortedError. Any error on either loopPromise or
890
+ * streamPromise aborts the other one, which then results in a ReplicationAbortedError, hiding the
891
+ * original cause.
892
+ */
893
+ function replicationLoopError(results: PromiseSettledResult<any>[]): unknown {
894
+ // 1. Prioritize not ReplicationAbortedError.
895
+ for (const result of results) {
896
+ if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) {
897
+ return result.reason;
898
+ }
899
+ }
900
+ // 2. Fallback to ReplicationAbortedError.
901
+ for (const result of results) {
902
+ if (result.status == 'rejected') {
903
+ // At this point only ReplicationAbortedError remains
904
+ return result.reason;
905
+ }
906
+ }
907
+ // 3. Should never happen, but we cover this case.
908
+ return new ReplicationAssertionError(`Replication loop exited unexpectedly`);
909
+ }