@powersync/service-module-mongodb 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,729 @@
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import { container, ErrorCode, Logger, ReplicationAbortedError, ServiceError } from '@powersync/lib-services-framework';
3
+ import {
4
+ InternalOpId,
5
+ MetricsEngine,
6
+ PerformanceTracer,
7
+ SaveOperationTag,
8
+ SourceEntityDescriptor,
9
+ SourceTable,
10
+ storage
11
+ } from '@powersync/service-core';
12
+ import { HydratedSyncConfig, TablePattern } from '@powersync/service-sync-rules';
13
+ import { ReplicationMetric } from '@powersync/service-types';
14
+ import { performance } from 'node:perf_hooks';
15
+ import { MongoLSN } from '../common/MongoLSN.js';
16
+ import { PostImagesOption } from '../types/types.js';
17
+ import { escapeRegExp } from '../utils.js';
18
+ import { MongoManager } from './MongoManager.js';
19
+ import { createCheckpoint, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
20
+ import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
21
+ import { ChangeStreamBatch, parseChangeDocument, rawChangeStream } from './RawChangeStream.js';
22
+ import { CHECKPOINTS_COLLECTION } from './replication-utils.js';
23
+ import { DirectSourceRowConverter, SourceRowConverter } from './SourceRowConverter.js';
24
+
25
+ export interface MongoSnapshotterOptions {
26
+ connections: MongoManager;
27
+ storage: storage.SyncRulesBucketStorage;
28
+ metrics: MetricsEngine;
29
+ abortSignal: AbortSignal;
30
+ maxAwaitTimeMS?: number;
31
+ snapshotChunkLength?: number;
32
+ logger?: Logger;
33
+ checkpointStreamId: mongo.ObjectId;
34
+ storageHooks?: storage.StorageHooks;
35
+ snapshotHooks?: MongoSnapshotterHooks;
36
+ }
37
+
38
+ export interface MongoSnapshotterHooks {
39
+ beforeSnapshotStarted?: (table: SourceTable) => Promise<void>;
40
+ }
41
+
42
+ interface InitResult {
43
+ needsInitialSync: boolean;
44
+ snapshotLsn: string | null;
45
+ }
46
+
47
+ interface SnapshotQueueItem {
48
+ table: SourceTable;
49
+ ready: Promise<void>;
50
+ cancelled: boolean;
51
+ }
52
+
53
+ export class MongoSnapshotter {
54
+ private readonly storage: storage.SyncRulesBucketStorage;
55
+ private readonly metrics: MetricsEngine;
56
+ private readonly connections: MongoManager;
57
+ private readonly client: mongo.MongoClient;
58
+ private readonly defaultDb: mongo.Db;
59
+ private readonly syncRules: HydratedSyncConfig;
60
+ private readonly sourceRowConverter: SourceRowConverter;
61
+ private readonly maxAwaitTimeMS: number;
62
+ private readonly snapshotChunkLength: number;
63
+ private readonly abortSignal: AbortSignal;
64
+ private readonly logger: Logger;
65
+ private readonly checkpointStreamId: mongo.ObjectId;
66
+ private readonly storageHooks: storage.StorageHooks | undefined;
67
+ private readonly snapshotHooks: MongoSnapshotterHooks | undefined;
68
+ private readonly changeStreamTimeout: number;
69
+
70
+ private readonly connectionId = 1;
71
+ private readonly queue = new Set<SnapshotQueueItem>();
72
+ private initialSnapshotDone = Promise.withResolvers<void>();
73
+ private nextItemQueued: PromiseWithResolvers<void> | null = null;
74
+ private lastSnapshotOpId: InternalOpId | null = null;
75
+ private lastTouchedAt = performance.now();
76
+
77
+ constructor(options: MongoSnapshotterOptions) {
78
+ this.storage = options.storage;
79
+ this.metrics = options.metrics;
80
+ this.connections = options.connections;
81
+ this.client = options.connections.client;
82
+ this.defaultDb = options.connections.db;
83
+ this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
84
+ this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
85
+ this.abortSignal = options.abortSignal;
86
+ this.logger = options.logger ?? options.storage.logger;
87
+ this.checkpointStreamId = options.checkpointStreamId;
88
+ this.storageHooks = options.storageHooks;
89
+ this.snapshotHooks = options.snapshotHooks;
90
+ this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
91
+ this.syncRules = options.storage.getParsedSyncRules({
92
+ defaultSchema: this.defaultDb.databaseName
93
+ });
94
+ this.sourceRowConverter = new DirectSourceRowConverter(this.syncRules.compatibility);
95
+
96
+ this.abortSignal.addEventListener('abort', () => {
97
+ this.nextItemQueued?.resolve();
98
+ });
99
+ }
100
+
101
+ private get usePostImages() {
102
+ return this.connections.options.postImages != PostImagesOption.OFF;
103
+ }
104
+
105
+ private get configurePostImages() {
106
+ return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
107
+ }
108
+
109
+ public get supportsConcurrentSnapshots() {
110
+ return this.storage.storageConfig.softDeleteCurrentData;
111
+ }
112
+
113
+ async checkSlot(): Promise<InitResult> {
114
+ const status = await this.storage.getStatus();
115
+ if (status.snapshot_done && status.checkpoint_lsn) {
116
+ this.logger.info(`Initial replication already done`);
117
+ return { needsInitialSync: false, snapshotLsn: null };
118
+ }
119
+
120
+ return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
121
+ }
122
+
123
+ async setupCheckpointsCollection() {
124
+ const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
125
+ if (collection == null) {
126
+ await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
127
+ changeStreamPreAndPostImages: { enabled: true }
128
+ });
129
+ } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
130
+ // Drop + create requires less permissions than collMod,
131
+ // and we don't care about the data in this collection.
132
+ await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
133
+ await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
134
+ changeStreamPreAndPostImages: { enabled: true }
135
+ });
136
+ } else {
137
+ // Clear the collection on startup, to keep it clean
138
+ // We never query this collection directly, and don't want to keep the data around.
139
+ // We only use this to get data into the oplog/changestream.
140
+ await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
141
+ }
142
+ }
143
+
144
+ async queueSnapshotTables(snapshotLsn: string | null) {
145
+ await this.client.connect();
146
+ await using writer = await this.storage.createWriter({
147
+ zeroLSN: MongoLSN.ZERO.comparable,
148
+ defaultSchema: this.defaultDb.databaseName,
149
+ storeCurrentData: false,
150
+ skipExistingRows: true,
151
+ tracer: new PerformanceTracer('MongoDB initial snapshot setup')
152
+ });
153
+ if (snapshotLsn == null) {
154
+ // First replication attempt - get a snapshot and store the timestamp
155
+ snapshotLsn = await this.getSnapshotLsn();
156
+ await writer.setResumeLsn(snapshotLsn);
157
+ this.logger.info(`Marking snapshot at ${snapshotLsn}`);
158
+ } else {
159
+ this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
160
+ // Check that the snapshot is still valid.
161
+ await this.validateSnapshotLsn(snapshotLsn);
162
+ }
163
+
164
+ // Start by resolving all tables.
165
+ // This checks postImage configuration, and that should fail as
166
+ // early as possible.
167
+ const allSourceTables: SourceTable[] = [];
168
+ for (const tablePattern of this.syncRules.getSourceTables()) {
169
+ allSourceTables.push(...(await this.resolveQualifiedTableNames(writer, tablePattern)));
170
+ }
171
+
172
+ for (const table of allSourceTables) {
173
+ if (table.snapshotComplete) {
174
+ this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
175
+ continue;
176
+ }
177
+ const count = await this.estimatedCountNumber(table);
178
+ const updated = await writer.updateTableProgress(table, {
179
+ totalEstimatedCount: count
180
+ });
181
+ this.queueTable(updated);
182
+ this.logger.info(
183
+ `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
184
+ );
185
+ }
186
+ }
187
+
188
+ async waitForInitialSnapshot() {
189
+ await this.initialSnapshotDone.promise;
190
+ }
191
+
192
+ async replicationLoop() {
193
+ try {
194
+ if (this.queue.size == 0) {
195
+ // Special case where we start with no tables to snapshot
196
+ await this.markSnapshotDone();
197
+ }
198
+ while (!this.abortSignal.aborted) {
199
+ const item = this.queue.values().next().value;
200
+ if (item == null) {
201
+ this.initialSnapshotDone.resolve();
202
+ this.nextItemQueued = Promise.withResolvers<void>();
203
+ await this.nextItemQueued.promise;
204
+ this.nextItemQueued = null;
205
+ continue;
206
+ }
207
+
208
+ await item.ready;
209
+ if (!item.cancelled) {
210
+ await this.replicateTable(item.table);
211
+ }
212
+ this.queue.delete(item);
213
+ if (this.queue.size == 0) {
214
+ await this.markSnapshotDone();
215
+ }
216
+ }
217
+ throw new ReplicationAbortedError(`Replication snapshotter aborted`, this.abortSignal.reason);
218
+ } catch (e) {
219
+ // If initial snapshot already completed, this has no effect
220
+ this.initialSnapshotDone.reject(e);
221
+ throw e;
222
+ }
223
+ }
224
+
225
+ private async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
226
+ const ready = Promise.withResolvers<void>();
227
+ const item = this.queueTable(table, ready.promise);
228
+ try {
229
+ await batch.markTableSnapshotRequired(table);
230
+ ready.resolve();
231
+ } catch (e) {
232
+ item.cancelled = true;
233
+ ready.resolve();
234
+ throw e;
235
+ } finally {
236
+ this.nextItemQueued?.resolve();
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Snapshot tables.
242
+ *
243
+ * If concurrency is supported, the snapshots are queued and processed in the background.
244
+ * Otherwise, snapshots are processed inline.
245
+ */
246
+ async snapshotTables(batch: storage.BucketStorageBatch, tables: storage.SourceTable[]): Promise<void> {
247
+ if (this.supportsConcurrentSnapshots) {
248
+ // Queue concurrent snapshots
249
+ for (const tableToSnapshot of tables) {
250
+ await this.queueSnapshot(batch, tableToSnapshot);
251
+ }
252
+ } else {
253
+ // No concurrency supported - snapshot inline
254
+ // Truncate in case a previous inline snapshot was interrupted after flushing rows, but before
255
+ // recording snapshot progress. Without this, resuming can replay already-flushed rows on v1/v2 storage.
256
+ await batch.truncate(tables);
257
+
258
+ for (const table of tables) {
259
+ await this.snapshotTable(batch, table);
260
+ }
261
+ const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
262
+
263
+ await batch.markTableSnapshotDone(tables, noCheckpointBefore);
264
+ }
265
+ }
266
+
267
+ private queueTable(table: SourceTable, ready = Promise.resolve()) {
268
+ const item: SnapshotQueueItem = { table, ready, cancelled: false };
269
+ this.queue.add(item);
270
+ this.nextItemQueued?.resolve();
271
+ return item;
272
+ }
273
+
274
+ private async markSnapshotDone() {
275
+ if (this.queue.size != 0) {
276
+ return;
277
+ }
278
+
279
+ const status = await this.storage.getStatus();
280
+ if (status.snapshot_done) {
281
+ return;
282
+ }
283
+
284
+ const lastOp = this.lastSnapshotOpId ?? status.keepalive_op;
285
+ if (lastOp != null) {
286
+ // Populate the cache _after_ initial replication, but _before_ we switch to this replication stream.
287
+ // Keeping snapshot_done false until this completes makes this resumable after interruption.
288
+ await this.storage.populatePersistentChecksumCache({
289
+ // No checkpoint yet, but we do have the opId.
290
+ maxOpId: lastOp,
291
+ signal: this.abortSignal
292
+ });
293
+ }
294
+
295
+ if (this.queue.size != 0) {
296
+ return;
297
+ }
298
+
299
+ await using writer = await this.storage.createWriter({
300
+ logger: this.logger,
301
+ zeroLSN: MongoLSN.ZERO.comparable,
302
+ defaultSchema: this.defaultDb.databaseName,
303
+ storeCurrentData: false,
304
+ skipExistingRows: true
305
+ });
306
+
307
+ // The checkpoint here is a marker - we need to replicate up to at least this
308
+ // point before the data can be considered consistent.
309
+ const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
310
+ if (this.queue.size != 0) {
311
+ return;
312
+ }
313
+
314
+ await writer.markSnapshotDone(checkpoint, {
315
+ // If there is a conflict, we'll try again after the next snapshot
316
+ throwOnConflict: false
317
+ });
318
+ // KLUDGE: We need to create an extra checkpoint _after_ marking the snapshot done, to fix
319
+ // issues with order of processing commits(). This is picked up by tests on postgres storage,
320
+ // the issue may be specific to that storage engine.
321
+ await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
322
+ }
323
+
324
+ private async replicateTable(tableRequest: SourceTable) {
325
+ await this.snapshotHooks?.beforeSnapshotStarted?.(tableRequest);
326
+
327
+ await using writer = await this.storage.createWriter({
328
+ logger: this.logger,
329
+ zeroLSN: MongoLSN.ZERO.comparable,
330
+ defaultSchema: this.defaultDb.databaseName,
331
+ storeCurrentData: false,
332
+ skipExistingRows: true,
333
+ hooks: this.storageHooks,
334
+ tracer: new PerformanceTracer('MongoDB snapshot table')
335
+ });
336
+ // Get fresh table info, in case it was updated while queuing.
337
+ // This deliberately does not resolve by namespace, since that could recreate a replacement source table
338
+ // for a dropped/recreated collection and leave the original queued snapshot with no owner.
339
+ const table = await writer.getSourceTableStatus(tableRequest);
340
+ if (table == null || table.snapshotComplete) {
341
+ return;
342
+ }
343
+
344
+ await this.snapshotTable(writer, table);
345
+ const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
346
+ await writer.markTableSnapshotDone([table], noCheckpointBefore);
347
+
348
+ // This commit ensures we set keepalive_op.
349
+ const resumeLsn = writer.resumeFromLsn ?? MongoLSN.ZERO.comparable;
350
+ await writer.commit(resumeLsn);
351
+
352
+ if (writer.last_flushed_op != null) {
353
+ this.lastSnapshotOpId = writer.last_flushed_op;
354
+ }
355
+ this.logger.info(`Flushed snapshot at ${writer.last_flushed_op}`);
356
+ }
357
+
358
+ private async resolveQualifiedTableNames(
359
+ batch: storage.BucketStorageBatch,
360
+ tablePattern: TablePattern
361
+ ): Promise<storage.SourceTable[]> {
362
+ const schema = tablePattern.schema;
363
+ if (tablePattern.connectionTag != this.connections.connectionTag) {
364
+ return [];
365
+ }
366
+
367
+ const nameFilter = tablePattern.isWildcard
368
+ ? new RegExp('^' + escapeRegExp(tablePattern.tablePrefix))
369
+ : tablePattern.name;
370
+ // Check if the collection exists
371
+ const collections = await this.client
372
+ .db(schema)
373
+ .listCollections({ name: nameFilter }, { nameOnly: false })
374
+ .toArray();
375
+
376
+ if (!tablePattern.isWildcard && collections.length == 0) {
377
+ this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
378
+ }
379
+
380
+ const result: storage.SourceTable[] = [];
381
+ for (const collection of collections) {
382
+ result.push(
383
+ ...(await this.handleRelation(
384
+ batch,
385
+ getMongoRelation({ db: schema, coll: collection.name }, this.connections.connectionTag),
386
+ {
387
+ collectionInfo: collection
388
+ }
389
+ ))
390
+ );
391
+ }
392
+
393
+ return result;
394
+ }
395
+
396
+ private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
397
+ const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
398
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
399
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
400
+
401
+ const totalEstimatedCount = await this.estimatedCountNumber(table);
402
+ let at = table.snapshotStatus?.replicatedCount ?? 0;
403
+ const collection = this.client.db(table.schema).collection(table.name);
404
+ await using query = new ChunkedSnapshotQuery({
405
+ collection,
406
+ key: table.snapshotStatus?.lastKey,
407
+ batchSize: this.snapshotChunkLength
408
+ });
409
+ if (query.lastKey != null) {
410
+ this.logger.info(
411
+ `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
412
+ );
413
+ } else {
414
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
415
+ }
416
+
417
+ let lastBatch = performance.now();
418
+ let nextChunkPromise = query.nextChunk();
419
+ while (true) {
420
+ const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
421
+ if (docBatch.length == 0) {
422
+ // No more data - stop iterating
423
+ break;
424
+ }
425
+ bytesReplicatedMetric.add(chunkBytes);
426
+ chunksReplicatedMetric.add(1);
427
+
428
+ if (this.abortSignal.aborted) {
429
+ throw new ReplicationAbortedError(`Aborted initial replication`, this.abortSignal.reason);
430
+ }
431
+
432
+ // Pre-fetch next batch, so that we can read and write concurrently
433
+ nextChunkPromise = query.nextChunk();
434
+ for (const buffer of docBatch) {
435
+ const { row, replicaId } = this.sourceRowConverter.rawToSqliteRow(buffer);
436
+ // This auto-flushes when the batch reaches its size limit
437
+ await batch.save({
438
+ tag: SaveOperationTag.INSERT,
439
+ sourceTable: table,
440
+ before: undefined,
441
+ beforeReplicaId: undefined,
442
+ after: row,
443
+ afterReplicaId: replicaId
444
+ });
445
+ }
446
+
447
+ // Important: flush before marking progress
448
+ const result = await batch.flush();
449
+ if (result?.flushed_op != null) {
450
+ this.lastSnapshotOpId = result.flushed_op;
451
+ }
452
+ at += docBatch.length;
453
+ rowsReplicatedMetric.add(docBatch.length);
454
+
455
+ table = await batch.updateTableProgress(table, {
456
+ lastKey,
457
+ replicatedCount: at,
458
+ totalEstimatedCount
459
+ });
460
+
461
+ const duration = performance.now() - lastBatch;
462
+ lastBatch = performance.now();
463
+ this.logger.info(
464
+ `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
465
+ );
466
+ this.touch();
467
+ }
468
+ // In case the loop was interrupted, make sure we await the last promise.
469
+ await nextChunkPromise;
470
+ }
471
+
472
+ private async handleRelation(
473
+ batch: storage.BucketStorageBatch,
474
+ descriptor: SourceEntityDescriptor,
475
+ options: { collectionInfo: mongo.CollectionInfo | undefined }
476
+ ): Promise<SourceTable[]> {
477
+ if (options.collectionInfo != null) {
478
+ await this.checkPostImages(descriptor.schema, options.collectionInfo);
479
+ } else {
480
+ // If collectionInfo is null, the collection may have been dropped.
481
+ // Ignore the postImages check in this case.
482
+ }
483
+
484
+ const result = await batch.resolveTables({
485
+ connection_id: this.connectionId,
486
+ source: descriptor,
487
+ syncRules: this.syncRules
488
+ });
489
+
490
+ // Drop conflicting collections.
491
+ // This is generally not expected for MongoDB source dbs, so we log an error.
492
+ if (result.dropTables.length > 0) {
493
+ this.logger.error(
494
+ `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
495
+ );
496
+ await batch.drop(result.dropTables);
497
+ }
498
+
499
+ return result.tables;
500
+ }
501
+
502
+ private async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
503
+ return await this.client.db(table.schema).collection(table.name).estimatedDocumentCount();
504
+ }
505
+
506
+ private async getCollectionInfo(db: string, name: string): Promise<mongo.CollectionInfo | undefined> {
507
+ return (await this.client.db(db).listCollections({ name }, { nameOnly: false }).toArray())[0];
508
+ }
509
+
510
+ private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) {
511
+ if (!this.usePostImages) {
512
+ // Nothing to check
513
+ return;
514
+ }
515
+
516
+ const enabled = collectionInfo.options?.changeStreamPreAndPostImages?.enabled == true;
517
+ if (!enabled && this.configurePostImages) {
518
+ await this.client.db(db).command({
519
+ collMod: collectionInfo.name,
520
+ changeStreamPreAndPostImages: { enabled: true }
521
+ });
522
+ this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`);
523
+ } else if (!enabled) {
524
+ throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`);
525
+ }
526
+ }
527
+
528
+ private async getSnapshotLsn(): Promise<string> {
529
+ const hello = await this.defaultDb.command({ hello: 1 });
530
+ // Basic sanity check
531
+ if (hello.msg == 'isdbgrid') {
532
+ throw new ServiceError(
533
+ ErrorCode.PSYNC_S1341,
534
+ 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).'
535
+ );
536
+ } else if (hello.setName == null) {
537
+ throw new ServiceError(
538
+ ErrorCode.PSYNC_S1342,
539
+ 'Standalone MongoDB instances are not supported - use a replicaset.'
540
+ );
541
+ }
542
+
543
+ // Open a change stream just to get a resume token for later use.
544
+ // We could use clusterTime from the hello command, but that won't tell us if the
545
+ // snapshot isn't valid anymore.
546
+ // If we just use the first resumeToken from the stream, we get two potential issues:
547
+ // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
548
+ // in source db or other stream issues.
549
+ // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
550
+ // skip that event.
551
+ // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
552
+ // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
553
+ // periodically until the timeout is reached.
554
+
555
+ const LSN_TIMEOUT_SECONDS = 60;
556
+ const LSN_CREATE_INTERVAL_SECONDS = 1;
557
+
558
+ const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
559
+ const filters = this.getSourceNamespaceFilters();
560
+ const iter = this.rawChangeStreamBatches({
561
+ lsn: firstCheckpointLsn,
562
+ maxAwaitTimeMS: 0,
563
+ signal: this.abortSignal,
564
+ filters
565
+ });
566
+ const startTime = performance.now();
567
+ let lastCheckpointCreated = performance.now();
568
+ let eventsSeen = 0;
569
+ let batchesSeen = 0;
570
+
571
+ for await (const { events } of iter) {
572
+ if (performance.now() - startTime >= LSN_TIMEOUT_SECONDS * 1000) {
573
+ break;
574
+ }
575
+ if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
576
+ await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
577
+ lastCheckpointCreated = performance.now();
578
+ }
579
+ batchesSeen += 1;
580
+
581
+ for (const rawChangeDocument of events) {
582
+ const changeDocument = parseChangeDocument(rawChangeDocument);
583
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
584
+
585
+ if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
586
+ const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
587
+ if (!this.checkpointStreamId.equals(checkpointId)) {
588
+ continue;
589
+ }
590
+ return new MongoLSN({
591
+ timestamp: changeDocument.clusterTime!,
592
+ resume_token: changeDocument._id
593
+ }).comparable;
594
+ }
595
+
596
+ eventsSeen += 1;
597
+ }
598
+ }
599
+
600
+ // Could happen if there is a very large replication lag?
601
+ throw new ServiceError(
602
+ ErrorCode.PSYNC_S1301,
603
+ `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}, batches = ${batchesSeen}`
604
+ );
605
+ }
606
+
607
+ /**
608
+ * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
609
+ */
610
+ private async validateSnapshotLsn(lsn: string) {
611
+ const stream = this.rawChangeStreamBatches({
612
+ lsn,
613
+ maxAwaitTimeMS: 0,
614
+ filters: this.getSourceNamespaceFilters()
615
+ });
616
+ for await (const _batch of stream) {
617
+ break;
618
+ }
619
+ }
620
+
621
+ private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } {
622
+ const sourceTables = this.syncRules.getSourceTables();
623
+
624
+ const inFilters: { db: string; coll: string }[] = [
625
+ { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION }
626
+ ];
627
+ const regexFilters: { 'ns.db': string; 'ns.coll': RegExp }[] = [];
628
+ let multipleDatabases = false;
629
+ for (const tablePattern of sourceTables) {
630
+ if (tablePattern.connectionTag != this.connections.connectionTag) {
631
+ continue;
632
+ }
633
+
634
+ if (tablePattern.schema != this.defaultDb.databaseName) {
635
+ multipleDatabases = true;
636
+ }
637
+
638
+ if (tablePattern.isWildcard) {
639
+ regexFilters.push({
640
+ 'ns.db': tablePattern.schema,
641
+ 'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix))
642
+ });
643
+ } else {
644
+ inFilters.push({
645
+ db: tablePattern.schema,
646
+ coll: tablePattern.name
647
+ });
648
+ }
649
+ }
650
+
651
+ const nsFilter = multipleDatabases
652
+ ? { ns: { $in: inFilters } }
653
+ : { 'ns.coll': { $in: inFilters.map((ns) => ns.coll) } };
654
+ if (regexFilters.length > 0) {
655
+ return { $match: { $or: [nsFilter, ...regexFilters] }, multipleDatabases };
656
+ }
657
+ return { $match: nsFilter, multipleDatabases };
658
+ }
659
+
660
+ private rawChangeStreamBatches(options: {
661
+ lsn: string | null;
662
+ maxAwaitTimeMS?: number;
663
+ batchSize?: number;
664
+ filters: { $match: any; multipleDatabases: boolean };
665
+ signal?: AbortSignal;
666
+ tracer?: PerformanceTracer<'changestream'>;
667
+ }): AsyncIterableIterator<ChangeStreamBatch> {
668
+ const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
669
+ const startAfter = lastLsn?.timestamp;
670
+ const resumeAfter = lastLsn?.resumeToken;
671
+
672
+ let fullDocument: 'required' | 'updateLookup';
673
+ if (this.usePostImages) {
674
+ // 'read_only' or 'auto_configure'
675
+ // Configuration happens during snapshot, or when we see new
676
+ // collections.
677
+ fullDocument = 'required';
678
+ } else {
679
+ fullDocument = 'updateLookup';
680
+ }
681
+ const streamOptions: mongo.ChangeStreamOptions & mongo.Document = {
682
+ showExpandedEvents: true,
683
+ fullDocument
684
+ };
685
+ const pipeline: mongo.Document[] = [
686
+ { $changeStream: streamOptions },
687
+ { $match: options.filters.$match },
688
+ { $changeStreamSplitLargeEvent: {} }
689
+ ];
690
+
691
+ // Only one of these options can be supplied at a time.
692
+ if (resumeAfter) {
693
+ streamOptions.resumeAfter = resumeAfter;
694
+ } else {
695
+ // Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the
696
+ // case if we have an old one.
697
+ streamOptions.startAtOperationTime = startAfter;
698
+ }
699
+
700
+ let watchDb: mongo.Db;
701
+ if (options.filters.multipleDatabases) {
702
+ // Requires readAnyDatabase@admin on Atlas
703
+ watchDb = this.client.db('admin');
704
+ streamOptions.allChangesForCluster = true;
705
+ } else {
706
+ // Same general result, but requires less permissions than the above
707
+ watchDb = this.defaultDb;
708
+ }
709
+
710
+ return rawChangeStream(watchDb, pipeline, {
711
+ batchSize: options.batchSize ?? this.snapshotChunkLength,
712
+ maxAwaitTimeMS: options.maxAwaitTimeMS ?? this.maxAwaitTimeMS,
713
+ maxTimeMS: this.changeStreamTimeout,
714
+ signal: options.signal,
715
+ logger: this.logger,
716
+ tracer: options.tracer
717
+ });
718
+ }
719
+
720
+ private touch() {
721
+ if (performance.now() - this.lastTouchedAt > 1_000) {
722
+ this.lastTouchedAt = performance.now();
723
+ // Update the probes, but don't wait for it
724
+ container.probes.touch().catch((e) => {
725
+ this.logger.error(`Failed to touch the container probe: ${e.message}`, e);
726
+ });
727
+ }
728
+ }
729
+ }