@powersync/service-module-mongodb 0.15.4 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +67 -0
  2. package/dist/api/MongoRouteAPIAdapter.js +12 -21
  3. package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
  4. package/dist/replication/ChangeStream.d.ts +23 -42
  5. package/dist/replication/ChangeStream.js +363 -600
  6. package/dist/replication/ChangeStream.js.map +1 -1
  7. package/dist/replication/ChangeStreamReplicationJob.js +2 -2
  8. package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
  9. package/dist/replication/JsonBufferWriter.d.ts +80 -0
  10. package/dist/replication/JsonBufferWriter.js +342 -0
  11. package/dist/replication/JsonBufferWriter.js.map +1 -0
  12. package/dist/replication/MongoRelation.d.ts +1 -1
  13. package/dist/replication/MongoRelation.js +45 -21
  14. package/dist/replication/MongoRelation.js.map +1 -1
  15. package/dist/replication/MongoSnapshotQuery.d.ts +1 -1
  16. package/dist/replication/MongoSnapshotQuery.js +6 -3
  17. package/dist/replication/MongoSnapshotQuery.js.map +1 -1
  18. package/dist/replication/MongoSnapshotter.d.ts +81 -0
  19. package/dist/replication/MongoSnapshotter.js +594 -0
  20. package/dist/replication/MongoSnapshotter.js.map +1 -0
  21. package/dist/replication/RawChangeStream.d.ts +55 -0
  22. package/dist/replication/RawChangeStream.js +322 -0
  23. package/dist/replication/RawChangeStream.js.map +1 -0
  24. package/dist/replication/SourceRowConverter.d.ts +46 -0
  25. package/dist/replication/SourceRowConverter.js +42 -0
  26. package/dist/replication/SourceRowConverter.js.map +1 -0
  27. package/dist/replication/bufferToSqlite.d.ts +43 -0
  28. package/dist/replication/bufferToSqlite.js +740 -0
  29. package/dist/replication/bufferToSqlite.js.map +1 -0
  30. package/dist/replication/internal-mongodb-utils.d.ts +0 -12
  31. package/dist/replication/internal-mongodb-utils.js +0 -54
  32. package/dist/replication/internal-mongodb-utils.js.map +1 -1
  33. package/dist/replication/replication-index.d.ts +2 -0
  34. package/dist/replication/replication-index.js +2 -0
  35. package/dist/replication/replication-index.js.map +1 -1
  36. package/package.json +11 -11
  37. package/scripts/benchmark-change-document-json.mts +358 -0
  38. package/scripts/benchmark-change-document.mts +370 -0
  39. package/src/api/MongoRouteAPIAdapter.ts +13 -21
  40. package/src/replication/ChangeStream.ts +421 -720
  41. package/src/replication/ChangeStreamReplicationJob.ts +2 -2
  42. package/src/replication/JsonBufferWriter.ts +390 -0
  43. package/src/replication/MongoRelation.ts +54 -25
  44. package/src/replication/MongoSnapshotQuery.ts +8 -5
  45. package/src/replication/MongoSnapshotter.ts +729 -0
  46. package/src/replication/RawChangeStream.ts +460 -0
  47. package/src/replication/SourceRowConverter.ts +65 -0
  48. package/src/replication/bufferToSqlite.ts +944 -0
  49. package/src/replication/internal-mongodb-utils.ts +0 -65
  50. package/src/replication/replication-index.ts +2 -0
  51. package/test/src/buffer_to_sqlite.test.ts +1146 -0
  52. package/test/src/change_stream.test.ts +259 -19
  53. package/test/src/change_stream_utils.ts +28 -27
  54. package/test/src/checkpoint_retry.test.ts +131 -0
  55. package/test/src/mongo_test.test.ts +66 -64
  56. package/test/src/parse_document_id.test.ts +54 -0
  57. package/test/src/raw_change_stream.test.ts +547 -0
  58. package/test/src/resume.test.ts +12 -2
  59. package/test/src/resuming_snapshots.test.ts +10 -6
  60. package/test/src/util.ts +56 -3
  61. package/test/tsconfig.json +0 -1
  62. package/tsconfig.scripts.json +13 -0
  63. package/tsconfig.tsbuildinfo +1 -1
  64. package/test/src/internal_mongodb_utils.test.ts +0 -103
@@ -1,14 +1,16 @@
1
- import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersync/lib-service-mongodb';
2
- import { container, DatabaseConnectionError, logger as defaultLogger, ErrorCode, ReplicationAbortedError, ReplicationAssertionError, ServiceError } from '@powersync/lib-services-framework';
3
- import { RelationCache, ReplicationLagTracker, SaveOperationTag } from '@powersync/service-core';
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import { container, DatabaseConnectionError, ErrorCode, ReplicationAbortedError, ReplicationAssertionError, ServiceError } from '@powersync/lib-services-framework';
3
+ import { PerformanceTracer, RelationCache, ReplicationLagTracker, SaveOperationTag } from '@powersync/service-core';
4
4
  import { ReplicationMetric } from '@powersync/service-types';
5
+ import { performance } from 'node:perf_hooks';
5
6
  import { MongoLSN } from '../common/MongoLSN.js';
6
7
  import { PostImagesOption } from '../types/types.js';
7
8
  import { escapeRegExp } from '../utils.js';
8
- import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
9
- import { constructAfterRecord, createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
10
- import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
9
+ import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
10
+ import { MongoSnapshotter } from './MongoSnapshotter.js';
11
+ import { parseChangeDocument, rawChangeStream } from './RawChangeStream.js';
11
12
  import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
13
+ import { DirectSourceRowConverter } from './SourceRowConverter.js';
12
14
  /**
13
15
  * Thrown when the change stream is not valid anymore, and replication
14
16
  * must be restarted.
@@ -32,13 +34,21 @@ export class ChangeStream {
32
34
  defaultDb;
33
35
  metrics;
34
36
  maxAwaitTimeMS;
35
- abort_signal;
37
+ abortController = new AbortController();
38
+ abortSignal = this.abortController.signal;
39
+ initPromise = null;
40
+ snapshotter;
41
+ /**
42
+ * We use the relationCache _only_ for caching static SourceTable info, not for snapshot status.
43
+ */
36
44
  relationCache = new RelationCache(getCacheIdentifier);
37
45
  replicationLag = new ReplicationLagTracker();
38
46
  checkpointStreamId = new mongo.ObjectId();
39
47
  logger;
40
48
  snapshotChunkLength;
41
49
  changeStreamTimeout;
50
+ storageHooks;
51
+ sourceRowConverter;
42
52
  constructor(options) {
43
53
  this.storage = options.storage;
44
54
  this.metrics = options.metrics;
@@ -46,22 +56,32 @@ export class ChangeStream {
46
56
  this.connections = options.connections;
47
57
  this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
48
58
  this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
59
+ this.storageHooks = options.storageHooks;
49
60
  this.client = this.connections.client;
50
61
  this.defaultDb = this.connections.db;
51
62
  this.sync_rules = options.storage.getParsedSyncRules({
52
63
  defaultSchema: this.defaultDb.databaseName
53
64
  });
65
+ this.sourceRowConverter = new DirectSourceRowConverter(this.sync_rules.compatibility);
54
66
  // The change stream aggregation command should timeout before the socket times out,
55
67
  // so we use 90% of the socket timeout value.
56
68
  this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
57
- this.abort_signal = options.abort_signal;
58
- this.abort_signal.addEventListener('abort', () => {
59
- // TODO: Fast abort?
69
+ this.logger = options.logger ?? this.storage.logger;
70
+ this.snapshotter = new MongoSnapshotter({
71
+ ...options,
72
+ abortSignal: this.abortSignal,
73
+ logger: this.logger,
74
+ checkpointStreamId: this.checkpointStreamId
75
+ });
76
+ options.abort_signal.addEventListener('abort', () => {
77
+ this.abortController.abort(options.abort_signal.reason);
60
78
  }, { once: true });
61
- this.logger = options.logger ?? defaultLogger;
79
+ if (options.abort_signal.aborted) {
80
+ this.abortController.abort(options.abort_signal.reason);
81
+ }
62
82
  }
63
83
  get stopped() {
64
- return this.abort_signal.aborted;
84
+ return this.abortSignal.aborted;
65
85
  }
66
86
  get usePostImages() {
67
87
  return this.connections.options.postImages != PostImagesOption.OFF;
@@ -69,220 +89,6 @@ export class ChangeStream {
69
89
  get configurePostImages() {
70
90
  return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
71
91
  }
72
- /**
73
- * This resolves a pattern, persists the related metadata, and returns
74
- * the resulting SourceTables.
75
- *
76
- * This implicitly checks the collection postImage configuration.
77
- */
78
- async resolveQualifiedTableNames(batch, tablePattern) {
79
- const schema = tablePattern.schema;
80
- if (tablePattern.connectionTag != this.connections.connectionTag) {
81
- return [];
82
- }
83
- let nameFilter;
84
- if (tablePattern.isWildcard) {
85
- nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix));
86
- }
87
- else {
88
- nameFilter = tablePattern.name;
89
- }
90
- let result = [];
91
- // Check if the collection exists
92
- const collections = await this.client
93
- .db(schema)
94
- .listCollections({
95
- name: nameFilter
96
- }, { nameOnly: false })
97
- .toArray();
98
- if (!tablePattern.isWildcard && collections.length == 0) {
99
- this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
100
- }
101
- for (let collection of collections) {
102
- const table = await this.handleRelation(batch, getMongoRelation({ db: schema, coll: collection.name }),
103
- // This is done as part of the initial setup - snapshot is handled elsewhere
104
- { snapshot: false, collectionInfo: collection });
105
- result.push(table);
106
- }
107
- return result;
108
- }
109
- async initSlot() {
110
- const status = await this.storage.getStatus();
111
- if (status.snapshot_done && status.checkpoint_lsn) {
112
- this.logger.info(`Initial replication already done`);
113
- return { needsInitialSync: false, snapshotLsn: null };
114
- }
115
- return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
116
- }
117
- async estimatedCount(table) {
118
- const count = await this.estimatedCountNumber(table);
119
- return `~${count}`;
120
- }
121
- async estimatedCountNumber(table) {
122
- const db = this.client.db(table.schema);
123
- return await db.collection(table.name).estimatedDocumentCount();
124
- }
125
- /**
126
- * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
127
- *
128
- * This LSN can survive initial replication restarts.
129
- */
130
- async getSnapshotLsn() {
131
- const hello = await this.defaultDb.command({ hello: 1 });
132
- // Basic sanity check
133
- if (hello.msg == 'isdbgrid') {
134
- throw new ServiceError(ErrorCode.PSYNC_S1341, 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).');
135
- }
136
- else if (hello.setName == null) {
137
- throw new ServiceError(ErrorCode.PSYNC_S1342, 'Standalone MongoDB instances are not supported - use a replicaset.');
138
- }
139
- // Open a change stream just to get a resume token for later use.
140
- // We could use clusterTime from the hello command, but that won't tell us if the
141
- // snapshot isn't valid anymore.
142
- // If we just use the first resumeToken from the stream, we get two potential issues:
143
- // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
144
- // in source db or other stream issues.
145
- // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
146
- // skip that event.
147
- // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
148
- // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
149
- // periodically until the timeout is reached.
150
- const LSN_TIMEOUT_SECONDS = 60;
151
- const LSN_CREATE_INTERVAL_SECONDS = 1;
152
- // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
153
- const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
154
- await using streamManager = this.openChangeStream({ lsn: firstCheckpointLsn, maxAwaitTimeMs: 0 });
155
- const { stream } = streamManager;
156
- const startTime = performance.now();
157
- let lastCheckpointCreated = performance.now();
158
- let eventsSeen = 0;
159
- while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
160
- if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
161
- await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
162
- lastCheckpointCreated = performance.now();
163
- }
164
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
165
- const changeDocument = await stream.tryNext().catch((e) => {
166
- throw mapChangeStreamError(e);
167
- });
168
- if (changeDocument == null) {
169
- continue;
170
- }
171
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
172
- if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
173
- const checkpointId = changeDocument.documentKey._id;
174
- if (!this.checkpointStreamId.equals(checkpointId)) {
175
- continue;
176
- }
177
- const { comparable: lsn } = new MongoLSN({
178
- timestamp: changeDocument.clusterTime,
179
- resume_token: changeDocument._id
180
- });
181
- return lsn;
182
- }
183
- eventsSeen += 1;
184
- }
185
- // Could happen if there is a very large replication lag?
186
- throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`);
187
- }
188
- /**
189
- * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
190
- */
191
- async validateSnapshotLsn(lsn) {
192
- await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
193
- const { stream } = streamManager;
194
- try {
195
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
196
- await stream.tryNext();
197
- }
198
- catch (e) {
199
- // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
200
- // we possibly cannot recover from it.
201
- throw mapChangeStreamError(e);
202
- }
203
- }
204
- async initialReplication(snapshotLsn) {
205
- const sourceTables = this.sync_rules.getSourceTables();
206
- await this.client.connect();
207
- const flushResult = await this.storage.startBatch({
208
- logger: this.logger,
209
- zeroLSN: MongoLSN.ZERO.comparable,
210
- defaultSchema: this.defaultDb.databaseName,
211
- storeCurrentData: false,
212
- skipExistingRows: true
213
- }, async (batch) => {
214
- if (snapshotLsn == null) {
215
- // First replication attempt - get a snapshot and store the timestamp
216
- snapshotLsn = await this.getSnapshotLsn();
217
- await batch.setResumeLsn(snapshotLsn);
218
- this.logger.info(`Marking snapshot at ${snapshotLsn}`);
219
- }
220
- else {
221
- this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
222
- // Check that the snapshot is still valid.
223
- await this.validateSnapshotLsn(snapshotLsn);
224
- }
225
- // Start by resolving all tables.
226
- // This checks postImage configuration, and that should fail as
227
- // early as possible.
228
- let allSourceTables = [];
229
- for (let tablePattern of sourceTables) {
230
- const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
231
- allSourceTables.push(...tables);
232
- }
233
- let tablesWithStatus = [];
234
- for (let table of allSourceTables) {
235
- if (table.snapshotComplete) {
236
- this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
237
- continue;
238
- }
239
- let count = await this.estimatedCountNumber(table);
240
- const updated = await batch.updateTableProgress(table, {
241
- totalEstimatedCount: count
242
- });
243
- tablesWithStatus.push(updated);
244
- this.relationCache.update(updated);
245
- this.logger.info(`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`);
246
- }
247
- for (let table of tablesWithStatus) {
248
- await this.snapshotTable(batch, table);
249
- await batch.markTableSnapshotDone([table]);
250
- this.touch();
251
- }
252
- // The checkpoint here is a marker - we need to replicate up to at least this
253
- // point before the data can be considered consistent.
254
- // We could do this for each individual table, but may as well just do it once for the entire snapshot.
255
- const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
256
- await batch.markAllSnapshotDone(checkpoint);
257
- // This will not create a consistent checkpoint yet, but will persist the op.
258
- // Actual checkpoint will be created when streaming replication caught up.
259
- await batch.commit(snapshotLsn);
260
- this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
261
- });
262
- return { lastOpId: flushResult?.flushed_op };
263
- }
264
- async setupCheckpointsCollection() {
265
- const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
266
- if (collection == null) {
267
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
268
- changeStreamPreAndPostImages: { enabled: true }
269
- });
270
- }
271
- else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
272
- // Drop + create requires less permissions than collMod,
273
- // and we don't care about the data in this collection.
274
- await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
275
- await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
276
- changeStreamPreAndPostImages: { enabled: true }
277
- });
278
- }
279
- else {
280
- // Clear the collection on startup, to keep it clean
281
- // We never query this collection directly, and don't want to keep the data around.
282
- // We only use this to get data into the oplog/changestream.
283
- await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
284
- }
285
- }
286
92
  getSourceNamespaceFilters() {
287
93
  const sourceTables = this.sync_rules.getSourceTables();
288
94
  let $inFilters = [
@@ -330,78 +136,10 @@ export class ChangeStream {
330
136
  }
331
137
  return { $match: nsFilter, multipleDatabases };
332
138
  }
333
- static *getQueryData(results) {
334
- for (let row of results) {
335
- yield constructAfterRecord(row);
336
- }
337
- }
338
- async snapshotTable(batch, table) {
339
- const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
340
- const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
341
- const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
342
- const totalEstimatedCount = await this.estimatedCountNumber(table);
343
- let at = table.snapshotStatus?.replicatedCount ?? 0;
344
- const db = this.client.db(table.schema);
345
- const collection = db.collection(table.name);
346
- await using query = new ChunkedSnapshotQuery({
347
- collection,
348
- key: table.snapshotStatus?.lastKey,
349
- batchSize: this.snapshotChunkLength
350
- });
351
- if (query.lastKey != null) {
352
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`);
353
- }
354
- else {
355
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
356
- }
357
- let lastBatch = performance.now();
358
- let nextChunkPromise = query.nextChunk();
359
- while (true) {
360
- const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
361
- if (docBatch.length == 0) {
362
- // No more data - stop iterating
363
- break;
364
- }
365
- bytesReplicatedMetric.add(chunkBytes);
366
- chunksReplicatedMetric.add(1);
367
- if (this.abort_signal.aborted) {
368
- throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
369
- }
370
- // Pre-fetch next batch, so that we can read and write concurrently
371
- nextChunkPromise = query.nextChunk();
372
- for (let document of docBatch) {
373
- const record = this.constructAfterRecord(document);
374
- // This auto-flushes when the batch reaches its size limit
375
- await batch.save({
376
- tag: SaveOperationTag.INSERT,
377
- sourceTable: table,
378
- before: undefined,
379
- beforeReplicaId: undefined,
380
- after: record,
381
- afterReplicaId: document._id
382
- });
383
- }
384
- // Important: flush before marking progress
385
- await batch.flush();
386
- at += docBatch.length;
387
- rowsReplicatedMetric.add(docBatch.length);
388
- table = await batch.updateTableProgress(table, {
389
- lastKey,
390
- replicatedCount: at,
391
- totalEstimatedCount: totalEstimatedCount
392
- });
393
- this.relationCache.update(table);
394
- const duration = performance.now() - lastBatch;
395
- lastBatch = performance.now();
396
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`);
397
- this.touch();
398
- }
399
- // In case the loop was interrupted, make sure we await the last promise.
400
- await nextChunkPromise;
401
- }
402
- async getRelation(batch, descriptor, options) {
403
- const existing = this.relationCache.get(descriptor);
139
+ async getRelations(batch, descriptor, options) {
140
+ const existing = this.relationCache.getAll(descriptor);
404
141
  if (existing != null) {
142
+ // We do this even when it's an empty result: Empty means nothing to sync, and we don't need to re-resolve.
405
143
  return existing;
406
144
  }
407
145
  // Note: collection may have been dropped at this point, so we handle
@@ -444,14 +182,11 @@ export class ChangeStream {
444
182
  // Ignore the postImages check in this case.
445
183
  }
446
184
  const snapshot = options.snapshot;
447
- const result = await this.storage.resolveTable({
448
- group_id: this.group_id,
185
+ const result = await batch.resolveTables({
449
186
  connection_id: this.connection_id,
450
- connection_tag: this.connections.connectionTag,
451
- entity_descriptor: descriptor,
452
- sync_rules: this.sync_rules
187
+ source: descriptor
453
188
  });
454
- this.relationCache.update(result.table);
189
+ this.relationCache.updateAll(descriptor, result.tables);
455
190
  // Drop conflicting collections.
456
191
  // This is generally not expected for MongoDB source dbs, so we log an error.
457
192
  if (result.dropTables.length > 0) {
@@ -461,37 +196,31 @@ export class ChangeStream {
461
196
  // Snapshot if:
462
197
  // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
463
198
  // 2. Snapshot is not already done, AND:
464
- // 3. The table is used in sync rules.
465
- const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
466
- if (shouldSnapshot) {
199
+ // 3. The table is used in sync config.
200
+ const snapshotCandidates = result.tables.filter((table) => snapshot && !table.snapshotComplete && table.syncAny);
201
+ if (snapshotCandidates.length > 0) {
467
202
  this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
468
- // Truncate this table, in case a previous snapshot was interrupted.
469
- await batch.truncate([result.table]);
470
- await this.snapshotTable(batch, result.table);
471
- const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
472
- const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn);
473
- return table;
474
- }
475
- return result.table;
476
- }
477
- constructAfterRecord(document) {
478
- const inputRow = constructAfterRecord(document);
479
- return this.sync_rules.applyRowContext(inputRow);
203
+ await this.snapshotter.snapshotTables(batch, snapshotCandidates);
204
+ }
205
+ return result.tables;
480
206
  }
481
207
  async writeChange(batch, table, change) {
482
208
  if (!table.syncAny) {
483
- this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`);
209
+ this.logger.debug(`Collection ${table.qualifiedName} not used in sync config - skipping`);
484
210
  return null;
485
211
  }
486
212
  this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
487
213
  if (change.operationType == 'insert') {
488
- const baseRecord = this.constructAfterRecord(change.fullDocument);
214
+ const { row: baseRecord, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument);
489
215
  return await batch.save({
490
216
  tag: SaveOperationTag.INSERT,
491
217
  sourceTable: table,
492
218
  before: undefined,
493
219
  beforeReplicaId: undefined,
494
220
  after: baseRecord,
221
+ // Same as _replicaId
222
+ // We specifically need to use the source _id, not the converted one in baseRecord,
223
+ // to preserve _id uniqueness properties.
495
224
  afterReplicaId: change.documentKey._id
496
225
  });
497
226
  }
@@ -505,14 +234,14 @@ export class ChangeStream {
505
234
  beforeReplicaId: change.documentKey._id
506
235
  });
507
236
  }
508
- const after = this.constructAfterRecord(change.fullDocument);
237
+ const { row: after, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument);
509
238
  return await batch.save({
510
239
  tag: SaveOperationTag.UPDATE,
511
240
  sourceTable: table,
512
241
  before: undefined,
513
242
  beforeReplicaId: undefined,
514
243
  after: after,
515
- afterReplicaId: change.documentKey._id
244
+ afterReplicaId: change.documentKey._id // Same as _replicaId
516
245
  });
517
246
  }
518
247
  else if (change.operationType == 'delete') {
@@ -528,34 +257,60 @@ export class ChangeStream {
528
257
  }
529
258
  }
530
259
  async replicate() {
260
+ let streamPromise = null;
261
+ let loopPromise = null;
531
262
  try {
532
263
  // If anything errors here, the entire replication process is halted, and
533
264
  // all connections automatically closed, including this one.
534
- await this.initReplication();
535
- await this.streamChanges();
265
+ this.initPromise = this.initReplication();
266
+ await this.initPromise;
267
+ loopPromise = this.snapshotter
268
+ .replicationLoop()
269
+ .then(() => {
270
+ throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`);
271
+ })
272
+ .catch((e) => {
273
+ this.abortController.abort(e);
274
+ throw e;
275
+ });
276
+ if (!this.snapshotter.supportsConcurrentSnapshots) {
277
+ await Promise.race([this.snapshotter.waitForInitialSnapshot(), loopPromise]);
278
+ }
279
+ streamPromise = this.streamChanges()
280
+ .then(() => {
281
+ throw new ReplicationAssertionError(`Replication stream exited unexpectedly`);
282
+ })
283
+ .catch((e) => {
284
+ this.abortController.abort(e);
285
+ throw e;
286
+ });
287
+ const results = await Promise.allSettled([loopPromise, streamPromise]);
288
+ throw replicationLoopError(results);
536
289
  }
537
290
  catch (e) {
538
291
  await this.storage.reportError(e);
539
292
  throw e;
540
293
  }
294
+ finally {
295
+ this.abortController.abort();
296
+ }
297
+ }
298
+ async waitForInitialSnapshot() {
299
+ if (this.initPromise == null) {
300
+ throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()');
301
+ }
302
+ await this.initPromise;
303
+ await this.snapshotter.waitForInitialSnapshot();
541
304
  }
542
305
  async initReplication() {
543
- const result = await this.initSlot();
544
- await this.setupCheckpointsCollection();
306
+ const result = await this.snapshotter.checkSlot();
307
+ await this.snapshotter.setupCheckpointsCollection();
545
308
  if (result.needsInitialSync) {
546
309
  if (result.snapshotLsn == null) {
547
310
  // Snapshot LSN is not present, so we need to start replication from scratch.
548
- await this.storage.clear({ signal: this.abort_signal });
549
- }
550
- const { lastOpId } = await this.initialReplication(result.snapshotLsn);
551
- if (lastOpId != null) {
552
- // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules.
553
- await this.storage.populatePersistentChecksumCache({
554
- signal: this.abort_signal,
555
- // No checkpoint yet, but we do have the opId.
556
- maxOpId: lastOpId
557
- });
311
+ await this.storage.clear({ signal: this.abortSignal });
558
312
  }
313
+ await this.snapshotter.queueSnapshotTables(result.snapshotLsn);
559
314
  }
560
315
  }
561
316
  async streamChanges() {
@@ -571,17 +326,11 @@ export class ChangeStream {
571
326
  throw e;
572
327
  }
573
328
  }
574
- openChangeStream(options) {
329
+ rawChangeStreamBatches(options) {
575
330
  const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
576
331
  const startAfter = lastLsn?.timestamp;
577
332
  const resumeAfter = lastLsn?.resumeToken;
578
- const filters = this.getSourceNamespaceFilters();
579
- const pipeline = [
580
- {
581
- $match: filters.$match
582
- },
583
- { $changeStreamSplitLargeEvent: {} }
584
- ];
333
+ const filters = options.filters;
585
334
  let fullDocument;
586
335
  if (this.usePostImages) {
587
336
  // 'read_only' or 'auto_configure'
@@ -594,10 +343,17 @@ export class ChangeStream {
594
343
  }
595
344
  const streamOptions = {
596
345
  showExpandedEvents: true,
597
- maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS,
598
- fullDocument: fullDocument,
599
- maxTimeMS: this.changeStreamTimeout
346
+ fullDocument: fullDocument
600
347
  };
348
+ const pipeline = [
349
+ {
350
+ $changeStream: streamOptions
351
+ },
352
+ {
353
+ $match: filters.$match
354
+ },
355
+ { $changeStreamSplitLargeEvent: {} }
356
+ ];
601
357
  /**
602
358
  * Only one of these options can be supplied at a time.
603
359
  */
@@ -610,48 +366,39 @@ export class ChangeStream {
610
366
  // This is also relevant for getSnapshotLSN().
611
367
  streamOptions.startAtOperationTime = startAfter;
612
368
  }
613
- let stream;
369
+ let watchDb;
614
370
  if (filters.multipleDatabases) {
615
- // Requires readAnyDatabase@admin on Atlas
616
- stream = this.client.watch(pipeline, streamOptions);
371
+ watchDb = this.client.db('admin');
372
+ streamOptions.allChangesForCluster = true;
617
373
  }
618
374
  else {
619
- // Same general result, but requires less permissions than the above
620
- stream = this.defaultDb.watch(pipeline, streamOptions);
375
+ watchDb = this.defaultDb;
621
376
  }
622
- this.abort_signal.addEventListener('abort', () => {
623
- stream.close();
377
+ return rawChangeStream(watchDb, pipeline, {
378
+ batchSize: options.batchSize ?? this.snapshotChunkLength,
379
+ maxAwaitTimeMS: options.maxAwaitTimeMS ?? this.maxAwaitTimeMS,
380
+ maxTimeMS: this.changeStreamTimeout,
381
+ signal: options.signal,
382
+ logger: this.logger,
383
+ tracer: options.tracer
624
384
  });
625
- return {
626
- stream,
627
- filters,
628
- [Symbol.asyncDispose]: async () => {
629
- return stream.close();
630
- }
631
- };
632
385
  }
633
- getBufferedChangeCount(stream) {
634
- // The driver keeps fetched change stream documents on the underlying cursor, but does
635
- // not expose that through the public ChangeStream API. We use this to detect backlog
636
- // building up before we have processed the corresponding source changes locally.
637
- // If the driver API changes, we'll have a hard error here.
638
- // We specifically want to avoid a silent performance regression if the driver behavior changes.
639
- const cursor = stream.cursor;
640
- if (cursor == null || typeof cursor.bufferedCount != 'function') {
641
- throw new ReplicationAssertionError('MongoDB ChangeStream no longer exposes an internal cursor with bufferedCount');
642
- }
643
- return cursor.bufferedCount();
386
+ rawToSqliteRow(row) {
387
+ return this.sourceRowConverter.rawToSqliteRow(row);
644
388
  }
645
389
  async streamChangesInternal() {
646
390
  const transactionsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED);
647
391
  const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
648
392
  const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
393
+ const tracer = new PerformanceTracer('MongoDB streaming replication');
649
394
  await this.storage.startBatch({
650
395
  logger: this.logger,
651
396
  zeroLSN: MongoLSN.ZERO.comparable,
652
397
  defaultSchema: this.defaultDb.databaseName,
653
398
  // We get a complete postimage for every change, so we don't need to store the current data.
654
- storeCurrentData: false
399
+ storeCurrentData: false,
400
+ hooks: this.storageHooks,
401
+ tracer
655
402
  }, async (batch) => {
656
403
  const { resumeFromLsn } = batch;
657
404
  if (resumeFromLsn == null) {
@@ -659,20 +406,18 @@ export class ChangeStream {
659
406
  }
660
407
  const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
661
408
  const startAfter = lastLsn?.timestamp;
409
+ let outerSpan = tracer.span('batch');
662
410
  // It is normal for this to be a minute or two old when there is a low volume
663
411
  // of ChangeStream events.
664
412
  const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
665
413
  this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
666
- await using streamManager = this.openChangeStream({ lsn: resumeFromLsn });
667
- const { stream, filters } = streamManager;
668
- if (this.abort_signal.aborted) {
669
- await stream.close();
670
- return;
671
- }
672
- trackChangeStreamBsonBytes(stream, (bytes) => {
673
- bytesReplicatedMetric.add(bytes);
674
- // Each of these represent a single response message from MongoDB.
675
- chunksReplicatedMetric.add(1);
414
+ const filters = this.getSourceNamespaceFilters();
415
+ // This is closed when the for loop below returns/breaks/throws
416
+ const batchStream = this.rawChangeStreamBatches({
417
+ lsn: resumeFromLsn,
418
+ filters,
419
+ signal: this.abortSignal,
420
+ tracer
676
421
  });
677
422
  // Always start with a checkpoint.
678
423
  // This helps us to clear errors when restarting, even if there is
@@ -680,36 +425,24 @@ export class ChangeStream {
680
425
  let waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
681
426
  let splitDocument = null;
682
427
  let flexDbNameWorkaroundLogged = false;
683
- let changesSinceLastCheckpoint = 0;
684
428
  let lastEmptyResume = performance.now();
685
429
  let lastTxnKey = null;
686
- while (true) {
687
- if (this.abort_signal.aborted) {
688
- break;
689
- }
690
- const originalChangeDocument = await stream.tryNext().catch((e) => {
691
- throw mapChangeStreamError(e);
692
- });
693
- // The stream was closed, we will only ever receive `null` from it
694
- if (!originalChangeDocument && stream.closed) {
695
- break;
696
- }
697
- if (this.abort_signal.aborted) {
430
+ for await (let eventBatch of batchStream) {
431
+ const { events, resumeToken } = eventBatch;
432
+ using batchSpan = tracer.span('processing');
433
+ bytesReplicatedMetric.add(eventBatch.byteSize);
434
+ chunksReplicatedMetric.add(1);
435
+ if (this.abortSignal.aborted) {
698
436
  break;
699
437
  }
700
- if (originalChangeDocument == null) {
701
- // We get a new null document after `maxAwaitTimeMS` if there were no other events.
702
- // In this case, stream.resumeToken is the resume token associated with the last response.
703
- // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
704
- // does update it.
705
- // From observed behavior, the actual resumeToken changes around once every 10 seconds.
438
+ this.touch();
439
+ if (events.length == 0) {
440
+ // No changes in this batch, but we still want to keep the connection alive.
441
+ // We do this by persisting a keepalive checkpoint.
706
442
  // If we don't update it on empty events, we do keep consistency, but resuming the stream
707
443
  // with old tokens may cause connection timeouts.
708
- // We throttle this further by only persisting a keepalive once a minute.
709
- // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
710
- // doing a keepalive in the middle of a transaction.
711
444
  if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
712
- const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
445
+ const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(resumeToken);
713
446
  await batch.keepalive(lsn);
714
447
  this.touch();
715
448
  lastEmptyResume = performance.now();
@@ -718,199 +451,230 @@ export class ChangeStream {
718
451
  this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`);
719
452
  this.replicationLag.markStarted();
720
453
  }
721
- continue;
454
+ // If we have no changes, we can just persist the keepalive.
455
+ // This is throttled to once per minute.
456
+ if (performance.now() - lastEmptyResume < 60_000) {
457
+ continue;
458
+ }
722
459
  }
723
460
  this.touch();
724
- if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
725
- continue;
726
- }
727
- let changeDocument = originalChangeDocument;
728
- if (originalChangeDocument?.splitEvent != null) {
729
- // Handle split events from $changeStreamSplitLargeEvent.
730
- // This is only relevant for very large update operations.
731
- const splitEvent = originalChangeDocument?.splitEvent;
732
- if (splitDocument == null) {
733
- splitDocument = originalChangeDocument;
734
- }
735
- else {
736
- splitDocument = Object.assign(splitDocument, originalChangeDocument);
737
- }
738
- if (splitEvent.fragment == splitEvent.of) {
739
- // Got all fragments
740
- changeDocument = splitDocument;
741
- splitDocument = null;
461
+ for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
462
+ const rawChangeDocument = events[eventIndex];
463
+ const originalChangeDocument = parseChangeDocument(rawChangeDocument);
464
+ if (this.abortSignal.aborted) {
465
+ break;
742
466
  }
743
- else {
744
- // Wait for more fragments
467
+ if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
745
468
  continue;
746
469
  }
747
- }
748
- else if (splitDocument != null) {
749
- // We were waiting for fragments, but got a different event
750
- throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
751
- }
752
- if (!filters.multipleDatabases &&
753
- 'ns' in changeDocument &&
754
- changeDocument.ns.db != this.defaultDb.databaseName &&
755
- changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
756
- // When all of the following conditions are met:
757
- // 1. We're replicating from an Atlas Flex instance.
758
- // 2. There were changestream events recorded while the PowerSync service is paused.
759
- // 3. We're only replicating from a single database.
760
- // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
761
- // instead of the expected {db: 'ps'}.
762
- // We correct this.
763
- changeDocument.ns.db = this.defaultDb.databaseName;
764
- if (!flexDbNameWorkaroundLogged) {
765
- flexDbNameWorkaroundLogged = true;
766
- this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
470
+ let changeDocument = originalChangeDocument;
471
+ if (originalChangeDocument?.splitEvent != null) {
472
+ // Handle split events from $changeStreamSplitLargeEvent.
473
+ // This is only relevant for very large update operations.
474
+ const splitEvent = originalChangeDocument?.splitEvent;
475
+ if (splitDocument == null) {
476
+ splitDocument = originalChangeDocument;
477
+ }
478
+ else {
479
+ splitDocument = Object.assign(splitDocument, originalChangeDocument);
480
+ }
481
+ if (splitEvent.fragment == splitEvent.of) {
482
+ // Got all fragments
483
+ changeDocument = splitDocument;
484
+ splitDocument = null;
485
+ }
486
+ else {
487
+ // Wait for more fragments
488
+ continue;
489
+ }
767
490
  }
768
- }
769
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
770
- if (ns?.coll == CHECKPOINTS_COLLECTION) {
771
- /**
772
- * Dropping the database does not provide an `invalidate` event.
773
- * We typically would receive `drop` events for the collection which we
774
- * would process below.
775
- *
776
- * However we don't commit the LSN after collections are dropped.
777
- * The prevents the `startAfter` or `resumeToken` from advancing past the drop events.
778
- * The stream also closes after the drop events.
779
- * This causes an infinite loop of processing the collection drop events.
780
- *
781
- * This check here invalidates the change stream if our `_checkpoints` collection
782
- * is dropped. This allows for detecting when the DB is dropped.
783
- */
784
- if (changeDocument.operationType == 'drop') {
785
- throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_checkpoints collection was dropped'));
491
+ else if (splitDocument != null) {
492
+ // We were waiting for fragments, but got a different event
493
+ throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
786
494
  }
787
- if (!(changeDocument.operationType == 'insert' ||
788
- changeDocument.operationType == 'update' ||
789
- changeDocument.operationType == 'replace')) {
790
- continue;
495
+ if (!filters.multipleDatabases &&
496
+ 'ns' in changeDocument &&
497
+ changeDocument.ns.db != this.defaultDb.databaseName &&
498
+ changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
499
+ // When all of the following conditions are met:
500
+ // 1. We're replicating from an Atlas Flex instance.
501
+ // 2. There were changestream events recorded while the PowerSync service is paused.
502
+ // 3. We're only replicating from a single database.
503
+ // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
504
+ // instead of the expected {db: 'ps'}.
505
+ // We correct this.
506
+ changeDocument.ns.db = this.defaultDb.databaseName;
507
+ if (!flexDbNameWorkaroundLogged) {
508
+ flexDbNameWorkaroundLogged = true;
509
+ this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
510
+ }
791
511
  }
792
- // We handle two types of checkpoint events:
793
- // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
794
- // immediately, regardless of where they were created.
795
- // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
796
- // limiting of commits, so we specifically want to exclude checkpoints from other streams.
797
- //
798
- // It may be useful to also throttle commits due to standalone checkpoints in the future.
799
- // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
800
- const checkpointId = changeDocument.documentKey._id;
801
- if (checkpointId == STANDALONE_CHECKPOINT_ID) {
802
- // Standalone / write checkpoint received.
803
- // When we are caught up, commit immediately to keep write checkpoint latency low.
804
- // Once there is already a batch checkpoint pending, or the driver has buffered more
805
- // change stream events, collapse standalone checkpoints into the normal batch
806
- // checkpoint flow to avoid commit churn under sustained load.
807
- if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
808
- if (waitForCheckpointLsn == null) {
512
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
513
+ if (ns?.coll == CHECKPOINTS_COLLECTION) {
514
+ /**
515
+ * Dropping the database does not provide an `invalidate` event.
516
+ * We typically would receive `drop` events for the collection which we
517
+ * would process below.
518
+ *
519
+ * However we don't commit the LSN after collections are dropped.
520
+ * This prevents the `startAfter` or `resumeToken` from advancing past the drop events.
521
+ * The stream also closes after the drop events.
522
+ * This causes an infinite loop of processing the collection drop events.
523
+ *
524
+ * This check here invalidates the change stream if our `_powersync_checkpoints` collection
525
+ * is dropped. This allows for detecting when the DB is dropped.
526
+ */
527
+ if (changeDocument.operationType == 'drop') {
528
+ throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_powersync_checkpoints collection was dropped'));
529
+ }
530
+ if (!(changeDocument.operationType == 'insert' ||
531
+ changeDocument.operationType == 'update' ||
532
+ changeDocument.operationType == 'replace')) {
533
+ continue;
534
+ }
535
+ // We handle two types of checkpoint events:
536
+ // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
537
+ // immediately, regardless of where they were created.
538
+ // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
539
+ // limiting of commits, so we specifically want to exclude checkpoints from other streams.
540
+ //
541
+ // It may be useful to also throttle commits due to standalone checkpoints in the future.
542
+ // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
543
+ const checkpointId = changeDocument.documentKey._id;
544
+ if (checkpointId == STANDALONE_CHECKPOINT_ID) {
545
+ // Standalone / write checkpoint received.
546
+ // When we are caught up, commit immediately to keep write checkpoint latency low.
547
+ // Once there is already a batch checkpoint pending, or the driver has buffered more
548
+ // change stream events, collapse standalone checkpoints into the normal batch
549
+ // checkpoint flow to avoid commit churn under sustained load.
550
+ const hasBufferedChanges = eventIndex < events.length - 1;
551
+ if (hasBufferedChanges && waitForCheckpointLsn == null) {
552
+ // Buffered changes - create a new batch checkpoint to rate limit commits
553
+ using _ = tracer.span('source_checkpoint');
809
554
  waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
555
+ continue;
556
+ }
557
+ else if (waitForCheckpointLsn != null) {
558
+ // Skip this checkpoint - wait for the batch checkpoint.
559
+ continue;
560
+ }
561
+ else {
562
+ // No buffered changes, and no batch checkpoint pending - commit immediately.
810
563
  }
564
+ }
565
+ else if (!this.checkpointStreamId.equals(checkpointId)) {
811
566
  continue;
812
567
  }
568
+ const { comparable: lsn } = new MongoLSN({
569
+ timestamp: changeDocument.clusterTime,
570
+ resume_token: changeDocument._id
571
+ });
572
+ if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
573
+ // Checkpoint out of order - should never happen with MongoDB.
574
+ // If it does happen, we throw an error to stop the replication - restarting should recover.
575
+ // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
576
+ // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
577
+ // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
578
+ throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
579
+ }
580
+ if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
581
+ waitForCheckpointLsn = null;
582
+ }
583
+ const { checkpointBlocked } = await batch.commit(lsn, {
584
+ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
585
+ });
586
+ if (!checkpointBlocked) {
587
+ this.replicationLag.markCommitted();
588
+ }
813
589
  }
814
- else if (!this.checkpointStreamId.equals(checkpointId)) {
815
- continue;
816
- }
817
- const { comparable: lsn } = new MongoLSN({
818
- timestamp: changeDocument.clusterTime,
819
- resume_token: changeDocument._id
820
- });
821
- if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
822
- // Checkpoint out of order - should never happen with MongoDB.
823
- // If it does happen, we throw an error to stop the replication - restarting should recover.
824
- // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
825
- // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
826
- // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
827
- throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
828
- }
829
- if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
830
- waitForCheckpointLsn = null;
831
- }
832
- const { checkpointBlocked } = await batch.commit(lsn, {
833
- oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
834
- });
835
- if (!checkpointBlocked) {
836
- this.replicationLag.markCommitted();
837
- changesSinceLastCheckpoint = 0;
838
- }
839
- }
840
- else if (changeDocument.operationType == 'insert' ||
841
- changeDocument.operationType == 'update' ||
842
- changeDocument.operationType == 'replace' ||
843
- changeDocument.operationType == 'delete') {
844
- if (waitForCheckpointLsn == null) {
845
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
846
- }
847
- const rel = getMongoRelation(changeDocument.ns);
848
- const table = await this.getRelation(batch, rel, {
849
- // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
850
- // for whatever reason, then we do need to snapshot it.
851
- // This may result in some duplicate operations when a collection is created for the first time after
852
- // sync rules was deployed.
853
- snapshot: true
854
- });
855
- if (table.syncAny) {
856
- this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
857
- const transactionKeyValue = transactionKey(changeDocument);
858
- if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
859
- // Very crude metric for counting transactions replicated.
860
- // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
861
- // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
862
- lastTxnKey = transactionKeyValue;
863
- transactionsReplicatedMetric.add(1);
590
+ else if (changeDocument.operationType == 'insert' ||
591
+ changeDocument.operationType == 'update' ||
592
+ changeDocument.operationType == 'replace' ||
593
+ changeDocument.operationType == 'delete') {
594
+ if (waitForCheckpointLsn == null) {
595
+ using _ = tracer.span('source_checkpoint');
596
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
864
597
  }
865
- const flushResult = await this.writeChange(batch, table, changeDocument);
866
- changesSinceLastCheckpoint += 1;
867
- if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
868
- // When we are catching up replication after an initial snapshot, there may be a very long delay
869
- // before we do a commit(). In that case, we need to periodically persist the resume LSN, so
870
- // we don't restart from scratch if we restart replication.
871
- // The same could apply if we need to catch up on replication after some downtime.
872
- const { comparable: lsn } = new MongoLSN({
873
- timestamp: changeDocument.clusterTime,
874
- resume_token: changeDocument._id
875
- });
876
- this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`);
877
- await batch.setResumeLsn(lsn);
878
- changesSinceLastCheckpoint = 0;
598
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
599
+ const tables = await this.getRelations(batch, rel, {
600
+ // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
601
+ // for whatever reason, then we do need to snapshot it.
602
+ // This may result in some duplicate operations when a collection is created for the first time after
603
+ // sync config was deployed.
604
+ snapshot: true
605
+ });
606
+ const tablesToReplicate = tables.filter((table) => table.syncAny);
607
+ if (tablesToReplicate.length > 0) {
608
+ this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
609
+ const transactionKeyValue = transactionKey(changeDocument);
610
+ if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
611
+ // Very crude metric for counting transactions replicated.
612
+ // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
613
+ // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
614
+ lastTxnKey = transactionKeyValue;
615
+ transactionsReplicatedMetric.add(1);
616
+ }
617
+ for (const table of tablesToReplicate) {
618
+ await this.writeChange(batch, table, changeDocument);
619
+ }
879
620
  }
880
621
  }
881
- }
882
- else if (changeDocument.operationType == 'drop') {
883
- const rel = getMongoRelation(changeDocument.ns);
884
- const table = await this.getRelation(batch, rel, {
885
- // We're "dropping" this collection, so never snapshot it.
886
- snapshot: false
887
- });
888
- if (table.syncAny) {
889
- await batch.drop([table]);
890
- this.relationCache.delete(table);
622
+ else if (changeDocument.operationType == 'drop') {
623
+ const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
624
+ const tables = await this.getRelations(batch, rel, {
625
+ // We're "dropping" this collection, so never snapshot it.
626
+ snapshot: false
627
+ });
628
+ const tablesToDrop = tables.filter((table) => table.syncAny);
629
+ if (tablesToDrop.length > 0) {
630
+ await batch.drop(tablesToDrop);
631
+ }
632
+ this.relationCache.delete(rel);
891
633
  }
892
- }
893
- else if (changeDocument.operationType == 'rename') {
894
- const relFrom = getMongoRelation(changeDocument.ns);
895
- const relTo = getMongoRelation(changeDocument.to);
896
- const tableFrom = await this.getRelation(batch, relFrom, {
897
- // We're "dropping" this collection, so never snapshot it.
898
- snapshot: false
899
- });
900
- if (tableFrom.syncAny) {
901
- await batch.drop([tableFrom]);
634
+ else if (changeDocument.operationType == 'rename') {
635
+ const relFrom = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
636
+ const relTo = getMongoRelation(changeDocument.to, this.connections.connectionTag);
637
+ const tablesFrom = await this.getRelations(batch, relFrom, {
638
+ // We're "dropping" this collection, so never snapshot it.
639
+ snapshot: false
640
+ });
641
+ const tablesToDrop = tablesFrom.filter((table) => table.syncAny);
642
+ if (tablesToDrop.length > 0) {
643
+ await batch.drop(tablesToDrop);
644
+ }
902
645
  this.relationCache.delete(relFrom);
646
+ // Here we do need to snapshot the new table
647
+ const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
648
+ await this.handleRelation(batch, relTo, {
649
+ // This is a new (renamed) collection, so always snapshot it.
650
+ snapshot: true,
651
+ collectionInfo: collection
652
+ });
903
653
  }
904
- // Here we do need to snapshot the new table
905
- const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
906
- await this.handleRelation(batch, relTo, {
907
- // This is a new (renamed) collection, so always snapshot it.
908
- snapshot: true,
909
- collectionInfo: collection
910
- });
911
654
  }
655
+ if (splitDocument == null) {
656
+ // We flush and mark progress on every batch of data we receive.
657
+ // Batches are generally large (64MB or 6000 events, whichever comes first),
658
+ // so this is a good natural point to flush and mark progress.
659
+ // We avoid this when splitDocument is set, since we cannot resume in the middle of a split event.
660
+ const { comparable: lsn } = MongoLSN.fromResumeToken(resumeToken);
661
+ await batch.flush({ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange });
662
+ // TODO: We should consider making this standard behavior of flush().
663
+ await batch.setResumeLsn(lsn);
664
+ }
665
+ batchSpan.end();
666
+ const durationsMicroseconds = outerSpan.end();
667
+ const duration = batchSpan.durationMillis;
668
+ this.logger.info(`Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`, {
669
+ count: events.length,
670
+ bytes: eventBatch.byteSize,
671
+ duration,
672
+ t: durationsMicroseconds
673
+ });
674
+ outerSpan = tracer.span('batch');
912
675
  }
913
676
  });
677
+ throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason);
914
678
  }
915
679
  getReplicationLagMillis() {
916
680
  return this.replicationLag.getLagMillis();
@@ -926,29 +690,6 @@ export class ChangeStream {
926
690
  }
927
691
  }
928
692
  }
929
- function mapChangeStreamError(e) {
930
- if (isMongoNetworkTimeoutError(e)) {
931
- // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out".
932
- // We wrap the error to make it more useful.
933
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
934
- }
935
- else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
936
- // maxTimeMS was reached. Example message:
937
- // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit
938
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
939
- }
940
- else if (isMongoServerError(e) &&
941
- e.codeName == 'NoMatchingDocument' &&
942
- e.errmsg?.includes('post-image was not found')) {
943
- throw new ChangeStreamInvalidatedError(e.errmsg, e);
944
- }
945
- else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) {
946
- throw new ChangeStreamInvalidatedError(e.message, e);
947
- }
948
- else {
949
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e);
950
- }
951
- }
952
693
  /**
953
694
  * Transaction key for a change stream event, used to detect transaction boundaries. Returns null if the event is not part of a transaction.
954
695
  */
@@ -958,4 +699,26 @@ function transactionKey(doc) {
958
699
  }
959
700
  return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
960
701
  }
702
+ /**
703
+ * Prioritize errors that are _not_ ReplicationAbortedError. Any error on either loopPromise or
704
+ * streamPromise aborts the other one, which then results in a ReplicationAbortedError, hiding the
705
+ * original cause.
706
+ */
707
+ function replicationLoopError(results) {
708
+ // 1. Prioritize not ReplicationAbortedError.
709
+ for (const result of results) {
710
+ if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) {
711
+ return result.reason;
712
+ }
713
+ }
714
+ // 2. Fallback to ReplicationAbortedError.
715
+ for (const result of results) {
716
+ if (result.status == 'rejected') {
717
+ // At this point only ReplicationAbortedError remains
718
+ return result.reason;
719
+ }
720
+ }
721
+ // 3. Should never happen, but we cover this case.
722
+ return new ReplicationAssertionError(`Replication loop exited unexpectedly`);
723
+ }
961
724
  //# sourceMappingURL=ChangeStream.js.map