@powersync/service-module-mongodb 0.15.2 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,66 +1,14 @@
1
- var __addDisposableResource = (this && this.__addDisposableResource) || function (env, value, async) {
2
- if (value !== null && value !== void 0) {
3
- if (typeof value !== "object" && typeof value !== "function") throw new TypeError("Object expected.");
4
- var dispose, inner;
5
- if (async) {
6
- if (!Symbol.asyncDispose) throw new TypeError("Symbol.asyncDispose is not defined.");
7
- dispose = value[Symbol.asyncDispose];
8
- }
9
- if (dispose === void 0) {
10
- if (!Symbol.dispose) throw new TypeError("Symbol.dispose is not defined.");
11
- dispose = value[Symbol.dispose];
12
- if (async) inner = dispose;
13
- }
14
- if (typeof dispose !== "function") throw new TypeError("Object not disposable.");
15
- if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };
16
- env.stack.push({ value: value, dispose: dispose, async: async });
17
- }
18
- else if (async) {
19
- env.stack.push({ async: true });
20
- }
21
- return value;
22
- };
23
- var __disposeResources = (this && this.__disposeResources) || (function (SuppressedError) {
24
- return function (env) {
25
- function fail(e) {
26
- env.error = env.hasError ? new SuppressedError(e, env.error, "An error was suppressed during disposal.") : e;
27
- env.hasError = true;
28
- }
29
- var r, s = 0;
30
- function next() {
31
- while (r = env.stack.pop()) {
32
- try {
33
- if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);
34
- if (r.dispose) {
35
- var result = r.dispose.call(r.value);
36
- if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });
37
- }
38
- else s |= 1;
39
- }
40
- catch (e) {
41
- fail(e);
42
- }
43
- }
44
- if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();
45
- if (env.hasError) throw env.error;
46
- }
47
- return next();
48
- };
49
- })(typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
50
- var e = new Error(message);
51
- return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
52
- });
53
1
  import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersync/lib-service-mongodb';
54
2
  import { container, DatabaseConnectionError, logger as defaultLogger, ErrorCode, ReplicationAbortedError, ReplicationAssertionError, ServiceError } from '@powersync/lib-services-framework';
55
- import { RelationCache, SaveOperationTag } from '@powersync/service-core';
3
+ import { RelationCache, ReplicationLagTracker, SaveOperationTag } from '@powersync/service-core';
56
4
  import { ReplicationMetric } from '@powersync/service-types';
57
5
  import { MongoLSN } from '../common/MongoLSN.js';
58
6
  import { PostImagesOption } from '../types/types.js';
59
7
  import { escapeRegExp } from '../utils.js';
8
+ import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
60
9
  import { constructAfterRecord, createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
61
10
  import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
62
11
  import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
63
- import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
64
12
  /**
65
13
  * Thrown when the change stream is not valid anymore, and replication
66
14
  * must be restarted.
@@ -86,16 +34,7 @@ export class ChangeStream {
86
34
  maxAwaitTimeMS;
87
35
  abort_signal;
88
36
  relationCache = new RelationCache(getCacheIdentifier);
89
- /**
90
- * Time of the oldest uncommitted change, according to the source db.
91
- * This is used to determine the replication lag.
92
- */
93
- oldestUncommittedChange = null;
94
- /**
95
- * Keep track of whether we have done a commit or keepalive yet.
96
- * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
97
- */
98
- isStartingReplication = true;
37
+ replicationLag = new ReplicationLagTracker();
99
38
  checkpointStreamId = new mongo.ObjectId();
100
39
  logger;
101
40
  snapshotChunkLength;
@@ -189,101 +128,77 @@ export class ChangeStream {
189
128
  * This LSN can survive initial replication restarts.
190
129
  */
191
130
  async getSnapshotLsn() {
192
- const env_1 = { stack: [], error: void 0, hasError: false };
193
- try {
194
- const hello = await this.defaultDb.command({ hello: 1 });
195
- // Basic sanity check
196
- if (hello.msg == 'isdbgrid') {
197
- throw new ServiceError(ErrorCode.PSYNC_S1341, 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).');
131
+ const hello = await this.defaultDb.command({ hello: 1 });
132
+ // Basic sanity check
133
+ if (hello.msg == 'isdbgrid') {
134
+ throw new ServiceError(ErrorCode.PSYNC_S1341, 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).');
135
+ }
136
+ else if (hello.setName == null) {
137
+ throw new ServiceError(ErrorCode.PSYNC_S1342, 'Standalone MongoDB instances are not supported - use a replicaset.');
138
+ }
139
+ // Open a change stream just to get a resume token for later use.
140
+ // We could use clusterTime from the hello command, but that won't tell us if the
141
+ // snapshot isn't valid anymore.
142
+ // If we just use the first resumeToken from the stream, we get two potential issues:
143
+ // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
144
+ // in source db or other stream issues.
145
+ // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
146
+ // skip that event.
147
+ // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
148
+ // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
149
+ // periodically until the timeout is reached.
150
+ const LSN_TIMEOUT_SECONDS = 60;
151
+ const LSN_CREATE_INTERVAL_SECONDS = 1;
152
+ // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
153
+ const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
154
+ await using streamManager = this.openChangeStream({ lsn: firstCheckpointLsn, maxAwaitTimeMs: 0 });
155
+ const { stream } = streamManager;
156
+ const startTime = performance.now();
157
+ let lastCheckpointCreated = performance.now();
158
+ let eventsSeen = 0;
159
+ while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
160
+ if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
161
+ await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
162
+ lastCheckpointCreated = performance.now();
198
163
  }
199
- else if (hello.setName == null) {
200
- throw new ServiceError(ErrorCode.PSYNC_S1342, 'Standalone MongoDB instances are not supported - use a replicaset.');
164
+ // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
165
+ const changeDocument = await stream.tryNext().catch((e) => {
166
+ throw mapChangeStreamError(e);
167
+ });
168
+ if (changeDocument == null) {
169
+ continue;
201
170
  }
202
- // Open a change stream just to get a resume token for later use.
203
- // We could use clusterTime from the hello command, but that won't tell us if the
204
- // snapshot isn't valid anymore.
205
- // If we just use the first resumeToken from the stream, we get two potential issues:
206
- // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
207
- // in source db or other stream issues.
208
- // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
209
- // skip that event.
210
- // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
211
- // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
212
- // periodically until the timeout is reached.
213
- const LSN_TIMEOUT_SECONDS = 60;
214
- const LSN_CREATE_INTERVAL_SECONDS = 1;
215
- // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
216
- const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
217
- const streamManager = __addDisposableResource(env_1, this.openChangeStream({ lsn: firstCheckpointLsn, maxAwaitTimeMs: 0 }), true);
218
- const { stream } = streamManager;
219
- const startTime = performance.now();
220
- let lastCheckpointCreated = performance.now();
221
- let eventsSeen = 0;
222
- while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
223
- if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
224
- await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
225
- lastCheckpointCreated = performance.now();
226
- }
227
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
228
- const changeDocument = await stream.tryNext().catch((e) => {
229
- throw mapChangeStreamError(e);
230
- });
231
- if (changeDocument == null) {
171
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
172
+ if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
173
+ const checkpointId = changeDocument.documentKey._id;
174
+ if (!this.checkpointStreamId.equals(checkpointId)) {
232
175
  continue;
233
176
  }
234
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
235
- if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
236
- const checkpointId = changeDocument.documentKey._id;
237
- if (!this.checkpointStreamId.equals(checkpointId)) {
238
- continue;
239
- }
240
- const { comparable: lsn } = new MongoLSN({
241
- timestamp: changeDocument.clusterTime,
242
- resume_token: changeDocument._id
243
- });
244
- return lsn;
245
- }
246
- eventsSeen += 1;
177
+ const { comparable: lsn } = new MongoLSN({
178
+ timestamp: changeDocument.clusterTime,
179
+ resume_token: changeDocument._id
180
+ });
181
+ return lsn;
247
182
  }
248
- // Could happen if there is a very large replication lag?
249
- throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`);
250
- }
251
- catch (e_1) {
252
- env_1.error = e_1;
253
- env_1.hasError = true;
254
- }
255
- finally {
256
- const result_1 = __disposeResources(env_1);
257
- if (result_1)
258
- await result_1;
183
+ eventsSeen += 1;
259
184
  }
185
+ // Could happen if there is a very large replication lag?
186
+ throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`);
260
187
  }
261
188
  /**
262
189
  * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
263
190
  */
264
191
  async validateSnapshotLsn(lsn) {
265
- const env_2 = { stack: [], error: void 0, hasError: false };
192
+ await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
193
+ const { stream } = streamManager;
266
194
  try {
267
- const streamManager = __addDisposableResource(env_2, this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }), true);
268
- const { stream } = streamManager;
269
- try {
270
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
271
- await stream.tryNext();
272
- }
273
- catch (e) {
274
- // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
275
- // we possibly cannot recover from it.
276
- throw mapChangeStreamError(e);
277
- }
278
- }
279
- catch (e_2) {
280
- env_2.error = e_2;
281
- env_2.hasError = true;
195
+ // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
196
+ await stream.tryNext();
282
197
  }
283
- finally {
284
- const result_2 = __disposeResources(env_2);
285
- if (result_2)
286
- await result_2;
198
+ catch (e) {
199
+ // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
200
+ // we possibly cannot recover from it.
201
+ throw mapChangeStreamError(e);
287
202
  }
288
203
  }
289
204
  async initialReplication(snapshotLsn) {
@@ -421,80 +336,68 @@ export class ChangeStream {
421
336
  }
422
337
  }
423
338
  async snapshotTable(batch, table) {
424
- const env_3 = { stack: [], error: void 0, hasError: false };
425
- try {
426
- const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
427
- const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
428
- const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
429
- const totalEstimatedCount = await this.estimatedCountNumber(table);
430
- let at = table.snapshotStatus?.replicatedCount ?? 0;
431
- const db = this.client.db(table.schema);
432
- const collection = db.collection(table.name);
433
- const query = __addDisposableResource(env_3, new ChunkedSnapshotQuery({
434
- collection,
435
- key: table.snapshotStatus?.lastKey,
436
- batchSize: this.snapshotChunkLength
437
- }), true);
438
- if (query.lastKey != null) {
439
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`);
339
+ const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
340
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
341
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
342
+ const totalEstimatedCount = await this.estimatedCountNumber(table);
343
+ let at = table.snapshotStatus?.replicatedCount ?? 0;
344
+ const db = this.client.db(table.schema);
345
+ const collection = db.collection(table.name);
346
+ await using query = new ChunkedSnapshotQuery({
347
+ collection,
348
+ key: table.snapshotStatus?.lastKey,
349
+ batchSize: this.snapshotChunkLength
350
+ });
351
+ if (query.lastKey != null) {
352
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`);
353
+ }
354
+ else {
355
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
356
+ }
357
+ let lastBatch = performance.now();
358
+ let nextChunkPromise = query.nextChunk();
359
+ while (true) {
360
+ const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
361
+ if (docBatch.length == 0) {
362
+ // No more data - stop iterating
363
+ break;
440
364
  }
441
- else {
442
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
365
+ bytesReplicatedMetric.add(chunkBytes);
366
+ chunksReplicatedMetric.add(1);
367
+ if (this.abort_signal.aborted) {
368
+ throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
443
369
  }
444
- let lastBatch = performance.now();
445
- let nextChunkPromise = query.nextChunk();
446
- while (true) {
447
- const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
448
- if (docBatch.length == 0) {
449
- // No more data - stop iterating
450
- break;
451
- }
452
- bytesReplicatedMetric.add(chunkBytes);
453
- chunksReplicatedMetric.add(1);
454
- if (this.abort_signal.aborted) {
455
- throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
456
- }
457
- // Pre-fetch next batch, so that we can read and write concurrently
458
- nextChunkPromise = query.nextChunk();
459
- for (let document of docBatch) {
460
- const record = this.constructAfterRecord(document);
461
- // This auto-flushes when the batch reaches its size limit
462
- await batch.save({
463
- tag: SaveOperationTag.INSERT,
464
- sourceTable: table,
465
- before: undefined,
466
- beforeReplicaId: undefined,
467
- after: record,
468
- afterReplicaId: document._id
469
- });
470
- }
471
- // Important: flush before marking progress
472
- await batch.flush();
473
- at += docBatch.length;
474
- rowsReplicatedMetric.add(docBatch.length);
475
- table = await batch.updateTableProgress(table, {
476
- lastKey,
477
- replicatedCount: at,
478
- totalEstimatedCount: totalEstimatedCount
370
+ // Pre-fetch next batch, so that we can read and write concurrently
371
+ nextChunkPromise = query.nextChunk();
372
+ for (let document of docBatch) {
373
+ const record = this.constructAfterRecord(document);
374
+ // This auto-flushes when the batch reaches its size limit
375
+ await batch.save({
376
+ tag: SaveOperationTag.INSERT,
377
+ sourceTable: table,
378
+ before: undefined,
379
+ beforeReplicaId: undefined,
380
+ after: record,
381
+ afterReplicaId: document._id
479
382
  });
480
- this.relationCache.update(table);
481
- const duration = performance.now() - lastBatch;
482
- lastBatch = performance.now();
483
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`);
484
- this.touch();
485
383
  }
486
- // In case the loop was interrupted, make sure we await the last promise.
487
- await nextChunkPromise;
488
- }
489
- catch (e_3) {
490
- env_3.error = e_3;
491
- env_3.hasError = true;
492
- }
493
- finally {
494
- const result_3 = __disposeResources(env_3);
495
- if (result_3)
496
- await result_3;
384
+ // Important: flush before marking progress
385
+ await batch.flush();
386
+ at += docBatch.length;
387
+ rowsReplicatedMetric.add(docBatch.length);
388
+ table = await batch.updateTableProgress(table, {
389
+ lastKey,
390
+ replicatedCount: at,
391
+ totalEstimatedCount: totalEstimatedCount
392
+ });
393
+ this.relationCache.update(table);
394
+ const duration = performance.now() - lastBatch;
395
+ lastBatch = performance.now();
396
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`);
397
+ this.touch();
497
398
  }
399
+ // In case the loop was interrupted, make sure we await the last promise.
400
+ await nextChunkPromise;
498
401
  }
499
402
  async getRelation(batch, descriptor, options) {
500
403
  const existing = this.relationCache.get(descriptor);
@@ -750,292 +653,267 @@ export class ChangeStream {
750
653
  // We get a complete postimage for every change, so we don't need to store the current data.
751
654
  storeCurrentData: false
752
655
  }, async (batch) => {
753
- const env_4 = { stack: [], error: void 0, hasError: false };
754
- try {
755
- const { resumeFromLsn } = batch;
756
- if (resumeFromLsn == null) {
757
- throw new ReplicationAssertionError(`No LSN found to resume from`);
758
- }
759
- const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
760
- const startAfter = lastLsn?.timestamp;
761
- // It is normal for this to be a minute or two old when there is a low volume
762
- // of ChangeStream events.
763
- const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
764
- this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
765
- const streamManager = __addDisposableResource(env_4, this.openChangeStream({ lsn: resumeFromLsn }), true);
766
- const { stream, filters } = streamManager;
656
+ const { resumeFromLsn } = batch;
657
+ if (resumeFromLsn == null) {
658
+ throw new ReplicationAssertionError(`No LSN found to resume from`);
659
+ }
660
+ const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
661
+ const startAfter = lastLsn?.timestamp;
662
+ // It is normal for this to be a minute or two old when there is a low volume
663
+ // of ChangeStream events.
664
+ const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
665
+ this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
666
+ await using streamManager = this.openChangeStream({ lsn: resumeFromLsn });
667
+ const { stream, filters } = streamManager;
668
+ if (this.abort_signal.aborted) {
669
+ await stream.close();
670
+ return;
671
+ }
672
+ trackChangeStreamBsonBytes(stream, (bytes) => {
673
+ bytesReplicatedMetric.add(bytes);
674
+ // Each of these represent a single response message from MongoDB.
675
+ chunksReplicatedMetric.add(1);
676
+ });
677
+ // Always start with a checkpoint.
678
+ // This helps us to clear errors when restarting, even if there is
679
+ // no data to replicate.
680
+ let waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
681
+ let splitDocument = null;
682
+ let flexDbNameWorkaroundLogged = false;
683
+ let changesSinceLastCheckpoint = 0;
684
+ let lastEmptyResume = performance.now();
685
+ let lastTxnKey = null;
686
+ while (true) {
767
687
  if (this.abort_signal.aborted) {
768
- await stream.close();
769
- return;
688
+ break;
770
689
  }
771
- trackChangeStreamBsonBytes(stream, (bytes) => {
772
- bytesReplicatedMetric.add(bytes);
773
- // Each of these represent a single response message from MongoDB.
774
- chunksReplicatedMetric.add(1);
690
+ const originalChangeDocument = await stream.tryNext().catch((e) => {
691
+ throw mapChangeStreamError(e);
775
692
  });
776
- // Always start with a checkpoint.
777
- // This helps us to clear errors when restarting, even if there is
778
- // no data to replicate.
779
- let waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
780
- let splitDocument = null;
781
- let flexDbNameWorkaroundLogged = false;
782
- let changesSinceLastCheckpoint = 0;
783
- let lastEmptyResume = performance.now();
784
- let lastTxnKey = null;
785
- while (true) {
786
- if (this.abort_signal.aborted) {
787
- break;
693
+ // The stream was closed, we will only ever receive `null` from it
694
+ if (!originalChangeDocument && stream.closed) {
695
+ break;
696
+ }
697
+ if (this.abort_signal.aborted) {
698
+ break;
699
+ }
700
+ if (originalChangeDocument == null) {
701
+ // We get a new null document after `maxAwaitTimeMS` if there were no other events.
702
+ // In this case, stream.resumeToken is the resume token associated with the last response.
703
+ // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
704
+ // does update it.
705
+ // From observed behavior, the actual resumeToken changes around once every 10 seconds.
706
+ // If we don't update it on empty events, we do keep consistency, but resuming the stream
707
+ // with old tokens may cause connection timeouts.
708
+ // We throttle this further by only persisting a keepalive once a minute.
709
+ // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
710
+ // doing a keepalive in the middle of a transaction.
711
+ if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
712
+ const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
713
+ await batch.keepalive(lsn);
714
+ this.touch();
715
+ lastEmptyResume = performance.now();
716
+ // Log the token update. This helps as a general "replication is still active" message in the logs.
717
+ // This token would typically be around 10s behind.
718
+ this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`);
719
+ this.replicationLag.markStarted();
788
720
  }
789
- const originalChangeDocument = await stream.tryNext().catch((e) => {
790
- throw mapChangeStreamError(e);
791
- });
792
- // The stream was closed, we will only ever receive `null` from it
793
- if (!originalChangeDocument && stream.closed) {
794
- break;
721
+ continue;
722
+ }
723
+ this.touch();
724
+ if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
725
+ continue;
726
+ }
727
+ let changeDocument = originalChangeDocument;
728
+ if (originalChangeDocument?.splitEvent != null) {
729
+ // Handle split events from $changeStreamSplitLargeEvent.
730
+ // This is only relevant for very large update operations.
731
+ const splitEvent = originalChangeDocument?.splitEvent;
732
+ if (splitDocument == null) {
733
+ splitDocument = originalChangeDocument;
795
734
  }
796
- if (this.abort_signal.aborted) {
797
- break;
735
+ else {
736
+ splitDocument = Object.assign(splitDocument, originalChangeDocument);
798
737
  }
799
- if (originalChangeDocument == null) {
800
- // We get a new null document after `maxAwaitTimeMS` if there were no other events.
801
- // In this case, stream.resumeToken is the resume token associated with the last response.
802
- // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
803
- // does update it.
804
- // From observed behavior, the actual resumeToken changes around once every 10 seconds.
805
- // If we don't update it on empty events, we do keep consistency, but resuming the stream
806
- // with old tokens may cause connection timeouts.
807
- // We throttle this further by only persisting a keepalive once a minute.
808
- // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
809
- // doing a keepalive in the middle of a transaction.
810
- if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
811
- const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
812
- await batch.keepalive(lsn);
813
- this.touch();
814
- lastEmptyResume = performance.now();
815
- // Log the token update. This helps as a general "replication is still active" message in the logs.
816
- // This token would typically be around 10s behind.
817
- this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`);
818
- this.isStartingReplication = false;
819
- }
820
- continue;
738
+ if (splitEvent.fragment == splitEvent.of) {
739
+ // Got all fragments
740
+ changeDocument = splitDocument;
741
+ splitDocument = null;
821
742
  }
822
- this.touch();
823
- if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
743
+ else {
744
+ // Wait for more fragments
824
745
  continue;
825
746
  }
826
- let changeDocument = originalChangeDocument;
827
- if (originalChangeDocument?.splitEvent != null) {
828
- // Handle split events from $changeStreamSplitLargeEvent.
829
- // This is only relevant for very large update operations.
830
- const splitEvent = originalChangeDocument?.splitEvent;
831
- if (splitDocument == null) {
832
- splitDocument = originalChangeDocument;
833
- }
834
- else {
835
- splitDocument = Object.assign(splitDocument, originalChangeDocument);
836
- }
837
- if (splitEvent.fragment == splitEvent.of) {
838
- // Got all fragments
839
- changeDocument = splitDocument;
840
- splitDocument = null;
841
- }
842
- else {
843
- // Wait for more fragments
844
- continue;
845
- }
747
+ }
748
+ else if (splitDocument != null) {
749
+ // We were waiting for fragments, but got a different event
750
+ throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
751
+ }
752
+ if (!filters.multipleDatabases &&
753
+ 'ns' in changeDocument &&
754
+ changeDocument.ns.db != this.defaultDb.databaseName &&
755
+ changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
756
+ // When all of the following conditions are met:
757
+ // 1. We're replicating from an Atlas Flex instance.
758
+ // 2. There were changestream events recorded while the PowerSync service is paused.
759
+ // 3. We're only replicating from a single database.
760
+ // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
761
+ // instead of the expected {db: 'ps'}.
762
+ // We correct this.
763
+ changeDocument.ns.db = this.defaultDb.databaseName;
764
+ if (!flexDbNameWorkaroundLogged) {
765
+ flexDbNameWorkaroundLogged = true;
766
+ this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
846
767
  }
847
- else if (splitDocument != null) {
848
- // We were waiting for fragments, but got a different event
849
- throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
768
+ }
769
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
770
+ if (ns?.coll == CHECKPOINTS_COLLECTION) {
771
+ /**
772
+ * Dropping the database does not provide an `invalidate` event.
773
+ * We typically would receive `drop` events for the collection which we
774
+ * would process below.
775
+ *
776
+ * However we don't commit the LSN after collections are dropped.
777
+ * The prevents the `startAfter` or `resumeToken` from advancing past the drop events.
778
+ * The stream also closes after the drop events.
779
+ * This causes an infinite loop of processing the collection drop events.
780
+ *
781
+ * This check here invalidates the change stream if our `_checkpoints` collection
782
+ * is dropped. This allows for detecting when the DB is dropped.
783
+ */
784
+ if (changeDocument.operationType == 'drop') {
785
+ throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_checkpoints collection was dropped'));
850
786
  }
851
- if (!filters.multipleDatabases &&
852
- 'ns' in changeDocument &&
853
- changeDocument.ns.db != this.defaultDb.databaseName &&
854
- changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
855
- // When all of the following conditions are met:
856
- // 1. We're replicating from an Atlas Flex instance.
857
- // 2. There were changestream events recorded while the PowerSync service is paused.
858
- // 3. We're only replicating from a single database.
859
- // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
860
- // instead of the expected {db: 'ps'}.
861
- // We correct this.
862
- changeDocument.ns.db = this.defaultDb.databaseName;
863
- if (!flexDbNameWorkaroundLogged) {
864
- flexDbNameWorkaroundLogged = true;
865
- this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
866
- }
787
+ if (!(changeDocument.operationType == 'insert' ||
788
+ changeDocument.operationType == 'update' ||
789
+ changeDocument.operationType == 'replace')) {
790
+ continue;
867
791
  }
868
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
869
- if (ns?.coll == CHECKPOINTS_COLLECTION) {
870
- /**
871
- * Dropping the database does not provide an `invalidate` event.
872
- * We typically would receive `drop` events for the collection which we
873
- * would process below.
874
- *
875
- * However we don't commit the LSN after collections are dropped.
876
- * The prevents the `startAfter` or `resumeToken` from advancing past the drop events.
877
- * The stream also closes after the drop events.
878
- * This causes an infinite loop of processing the collection drop events.
879
- *
880
- * This check here invalidates the change stream if our `_checkpoints` collection
881
- * is dropped. This allows for detecting when the DB is dropped.
882
- */
883
- if (changeDocument.operationType == 'drop') {
884
- throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_checkpoints collection was dropped'));
885
- }
886
- if (!(changeDocument.operationType == 'insert' ||
887
- changeDocument.operationType == 'update' ||
888
- changeDocument.operationType == 'replace')) {
889
- continue;
890
- }
891
- // We handle two types of checkpoint events:
892
- // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
893
- // immediately, regardless of where they were created.
894
- // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
895
- // limiting of commits, so we specifically want to exclude checkpoints from other streams.
896
- //
897
- // It may be useful to also throttle commits due to standalone checkpoints in the future.
898
- // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
899
- const checkpointId = changeDocument.documentKey._id;
900
- if (checkpointId == STANDALONE_CHECKPOINT_ID) {
901
- // Standalone / write checkpoint received.
902
- // When we are caught up, commit immediately to keep write checkpoint latency low.
903
- // Once there is already a batch checkpoint pending, or the driver has buffered more
904
- // change stream events, collapse standalone checkpoints into the normal batch
905
- // checkpoint flow to avoid commit churn under sustained load.
906
- if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
907
- if (waitForCheckpointLsn == null) {
908
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
909
- }
910
- continue;
792
+ // We handle two types of checkpoint events:
793
+ // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
794
+ // immediately, regardless of where they were created.
795
+ // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
796
+ // limiting of commits, so we specifically want to exclude checkpoints from other streams.
797
+ //
798
+ // It may be useful to also throttle commits due to standalone checkpoints in the future.
799
+ // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
800
+ const checkpointId = changeDocument.documentKey._id;
801
+ if (checkpointId == STANDALONE_CHECKPOINT_ID) {
802
+ // Standalone / write checkpoint received.
803
+ // When we are caught up, commit immediately to keep write checkpoint latency low.
804
+ // Once there is already a batch checkpoint pending, or the driver has buffered more
805
+ // change stream events, collapse standalone checkpoints into the normal batch
806
+ // checkpoint flow to avoid commit churn under sustained load.
807
+ if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
808
+ if (waitForCheckpointLsn == null) {
809
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
911
810
  }
912
- }
913
- else if (!this.checkpointStreamId.equals(checkpointId)) {
914
811
  continue;
915
812
  }
916
- const { comparable: lsn } = new MongoLSN({
917
- timestamp: changeDocument.clusterTime,
918
- resume_token: changeDocument._id
919
- });
920
- if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
921
- // Checkpoint out of order - should never happen with MongoDB.
922
- // If it does happen, we throw an error to stop the replication - restarting should recover.
923
- // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
924
- // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
925
- // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
926
- throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
927
- }
928
- if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
929
- waitForCheckpointLsn = null;
930
- }
931
- const { checkpointBlocked } = await batch.commit(lsn, {
932
- oldestUncommittedChange: this.oldestUncommittedChange
933
- });
934
- if (!checkpointBlocked) {
935
- this.oldestUncommittedChange = null;
936
- this.isStartingReplication = false;
937
- changesSinceLastCheckpoint = 0;
938
- }
939
813
  }
940
- else if (changeDocument.operationType == 'insert' ||
941
- changeDocument.operationType == 'update' ||
942
- changeDocument.operationType == 'replace' ||
943
- changeDocument.operationType == 'delete') {
944
- if (waitForCheckpointLsn == null) {
945
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
814
+ else if (!this.checkpointStreamId.equals(checkpointId)) {
815
+ continue;
816
+ }
817
+ const { comparable: lsn } = new MongoLSN({
818
+ timestamp: changeDocument.clusterTime,
819
+ resume_token: changeDocument._id
820
+ });
821
+ if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
822
+ // Checkpoint out of order - should never happen with MongoDB.
823
+ // If it does happen, we throw an error to stop the replication - restarting should recover.
824
+ // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
825
+ // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
826
+ // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
827
+ throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
828
+ }
829
+ if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
830
+ waitForCheckpointLsn = null;
831
+ }
832
+ const { checkpointBlocked } = await batch.commit(lsn, {
833
+ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
834
+ });
835
+ if (!checkpointBlocked) {
836
+ this.replicationLag.markCommitted();
837
+ changesSinceLastCheckpoint = 0;
838
+ }
839
+ }
840
+ else if (changeDocument.operationType == 'insert' ||
841
+ changeDocument.operationType == 'update' ||
842
+ changeDocument.operationType == 'replace' ||
843
+ changeDocument.operationType == 'delete') {
844
+ if (waitForCheckpointLsn == null) {
845
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
846
+ }
847
+ const rel = getMongoRelation(changeDocument.ns);
848
+ const table = await this.getRelation(batch, rel, {
849
+ // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
850
+ // for whatever reason, then we do need to snapshot it.
851
+ // This may result in some duplicate operations when a collection is created for the first time after
852
+ // sync rules was deployed.
853
+ snapshot: true
854
+ });
855
+ if (table.syncAny) {
856
+ this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
857
+ const transactionKeyValue = transactionKey(changeDocument);
858
+ if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
859
+ // Very crude metric for counting transactions replicated.
860
+ // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
861
+ // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
862
+ lastTxnKey = transactionKeyValue;
863
+ transactionsReplicatedMetric.add(1);
946
864
  }
947
- const rel = getMongoRelation(changeDocument.ns);
948
- const table = await this.getRelation(batch, rel, {
949
- // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
950
- // for whatever reason, then we do need to snapshot it.
951
- // This may result in some duplicate operations when a collection is created for the first time after
952
- // sync rules was deployed.
953
- snapshot: true
954
- });
955
- if (table.syncAny) {
956
- if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
957
- this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
958
- }
959
- const transactionKeyValue = transactionKey(changeDocument);
960
- if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
961
- // Very crude metric for counting transactions replicated.
962
- // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
963
- // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
964
- lastTxnKey = transactionKeyValue;
965
- transactionsReplicatedMetric.add(1);
966
- }
967
- const flushResult = await this.writeChange(batch, table, changeDocument);
968
- changesSinceLastCheckpoint += 1;
969
- if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
970
- // When we are catching up replication after an initial snapshot, there may be a very long delay
971
- // before we do a commit(). In that case, we need to periodically persist the resume LSN, so
972
- // we don't restart from scratch if we restart replication.
973
- // The same could apply if we need to catch up on replication after some downtime.
974
- const { comparable: lsn } = new MongoLSN({
975
- timestamp: changeDocument.clusterTime,
976
- resume_token: changeDocument._id
977
- });
978
- this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`);
979
- await batch.setResumeLsn(lsn);
980
- changesSinceLastCheckpoint = 0;
981
- }
865
+ const flushResult = await this.writeChange(batch, table, changeDocument);
866
+ changesSinceLastCheckpoint += 1;
867
+ if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
868
+ // When we are catching up replication after an initial snapshot, there may be a very long delay
869
+ // before we do a commit(). In that case, we need to periodically persist the resume LSN, so
870
+ // we don't restart from scratch if we restart replication.
871
+ // The same could apply if we need to catch up on replication after some downtime.
872
+ const { comparable: lsn } = new MongoLSN({
873
+ timestamp: changeDocument.clusterTime,
874
+ resume_token: changeDocument._id
875
+ });
876
+ this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`);
877
+ await batch.setResumeLsn(lsn);
878
+ changesSinceLastCheckpoint = 0;
982
879
  }
983
880
  }
984
- else if (changeDocument.operationType == 'drop') {
985
- const rel = getMongoRelation(changeDocument.ns);
986
- const table = await this.getRelation(batch, rel, {
987
- // We're "dropping" this collection, so never snapshot it.
988
- snapshot: false
989
- });
990
- if (table.syncAny) {
991
- await batch.drop([table]);
992
- this.relationCache.delete(table);
993
- }
881
+ }
882
+ else if (changeDocument.operationType == 'drop') {
883
+ const rel = getMongoRelation(changeDocument.ns);
884
+ const table = await this.getRelation(batch, rel, {
885
+ // We're "dropping" this collection, so never snapshot it.
886
+ snapshot: false
887
+ });
888
+ if (table.syncAny) {
889
+ await batch.drop([table]);
890
+ this.relationCache.delete(table);
994
891
  }
995
- else if (changeDocument.operationType == 'rename') {
996
- const relFrom = getMongoRelation(changeDocument.ns);
997
- const relTo = getMongoRelation(changeDocument.to);
998
- const tableFrom = await this.getRelation(batch, relFrom, {
999
- // We're "dropping" this collection, so never snapshot it.
1000
- snapshot: false
1001
- });
1002
- if (tableFrom.syncAny) {
1003
- await batch.drop([tableFrom]);
1004
- this.relationCache.delete(relFrom);
1005
- }
1006
- // Here we do need to snapshot the new table
1007
- const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
1008
- await this.handleRelation(batch, relTo, {
1009
- // This is a new (renamed) collection, so always snapshot it.
1010
- snapshot: true,
1011
- collectionInfo: collection
1012
- });
892
+ }
893
+ else if (changeDocument.operationType == 'rename') {
894
+ const relFrom = getMongoRelation(changeDocument.ns);
895
+ const relTo = getMongoRelation(changeDocument.to);
896
+ const tableFrom = await this.getRelation(batch, relFrom, {
897
+ // We're "dropping" this collection, so never snapshot it.
898
+ snapshot: false
899
+ });
900
+ if (tableFrom.syncAny) {
901
+ await batch.drop([tableFrom]);
902
+ this.relationCache.delete(relFrom);
1013
903
  }
904
+ // Here we do need to snapshot the new table
905
+ const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
906
+ await this.handleRelation(batch, relTo, {
907
+ // This is a new (renamed) collection, so always snapshot it.
908
+ snapshot: true,
909
+ collectionInfo: collection
910
+ });
1014
911
  }
1015
912
  }
1016
- catch (e_4) {
1017
- env_4.error = e_4;
1018
- env_4.hasError = true;
1019
- }
1020
- finally {
1021
- const result_4 = __disposeResources(env_4);
1022
- if (result_4)
1023
- await result_4;
1024
- }
1025
913
  });
1026
914
  }
1027
- async getReplicationLagMillis() {
1028
- if (this.oldestUncommittedChange == null) {
1029
- if (this.isStartingReplication) {
1030
- // We don't have anything to compute replication lag with yet.
1031
- return undefined;
1032
- }
1033
- else {
1034
- // We don't have any uncommitted changes, so replication is up-to-date.
1035
- return 0;
1036
- }
1037
- }
1038
- return Date.now() - this.oldestUncommittedChange.getTime();
915
+ getReplicationLagMillis() {
916
+ return this.replicationLag.getLagMillis();
1039
917
  }
1040
918
  lastTouchedAt = performance.now();
1041
919
  touch() {