@powersync/service-module-mongodb 0.15.4 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/dist/replication/ChangeStream.d.ts +6 -6
  3. package/dist/replication/ChangeStream.js +300 -322
  4. package/dist/replication/ChangeStream.js.map +1 -1
  5. package/dist/replication/ChangeStreamReplicationJob.js +2 -2
  6. package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
  7. package/dist/replication/JsonBufferWriter.d.ts +80 -0
  8. package/dist/replication/JsonBufferWriter.js +342 -0
  9. package/dist/replication/JsonBufferWriter.js.map +1 -0
  10. package/dist/replication/MongoRelation.js +4 -0
  11. package/dist/replication/MongoRelation.js.map +1 -1
  12. package/dist/replication/MongoSnapshotQuery.d.ts +1 -1
  13. package/dist/replication/MongoSnapshotQuery.js +6 -3
  14. package/dist/replication/MongoSnapshotQuery.js.map +1 -1
  15. package/dist/replication/RawChangeStream.d.ts +55 -0
  16. package/dist/replication/RawChangeStream.js +322 -0
  17. package/dist/replication/RawChangeStream.js.map +1 -0
  18. package/dist/replication/SourceRowConverter.d.ts +46 -0
  19. package/dist/replication/SourceRowConverter.js +42 -0
  20. package/dist/replication/SourceRowConverter.js.map +1 -0
  21. package/dist/replication/bufferToSqlite.d.ts +43 -0
  22. package/dist/replication/bufferToSqlite.js +740 -0
  23. package/dist/replication/bufferToSqlite.js.map +1 -0
  24. package/dist/replication/internal-mongodb-utils.d.ts +0 -12
  25. package/dist/replication/internal-mongodb-utils.js +0 -54
  26. package/dist/replication/internal-mongodb-utils.js.map +1 -1
  27. package/dist/replication/replication-index.d.ts +2 -0
  28. package/dist/replication/replication-index.js +2 -0
  29. package/dist/replication/replication-index.js.map +1 -1
  30. package/package.json +11 -11
  31. package/scripts/benchmark-change-document-json.mts +358 -0
  32. package/scripts/benchmark-change-document.mts +370 -0
  33. package/src/replication/ChangeStream.ts +348 -371
  34. package/src/replication/ChangeStreamReplicationJob.ts +2 -2
  35. package/src/replication/JsonBufferWriter.ts +390 -0
  36. package/src/replication/MongoRelation.ts +3 -0
  37. package/src/replication/MongoSnapshotQuery.ts +8 -5
  38. package/src/replication/RawChangeStream.ts +460 -0
  39. package/src/replication/SourceRowConverter.ts +65 -0
  40. package/src/replication/bufferToSqlite.ts +944 -0
  41. package/src/replication/internal-mongodb-utils.ts +0 -65
  42. package/src/replication/replication-index.ts +2 -0
  43. package/test/src/buffer_to_sqlite.test.ts +1146 -0
  44. package/test/src/change_stream.test.ts +49 -2
  45. package/test/src/change_stream_utils.ts +4 -10
  46. package/test/src/mongo_test.test.ts +66 -64
  47. package/test/src/parse_document_id.test.ts +54 -0
  48. package/test/src/raw_change_stream.test.ts +547 -0
  49. package/test/src/resume.test.ts +12 -2
  50. package/test/src/util.ts +56 -3
  51. package/test/tsconfig.json +0 -1
  52. package/tsconfig.scripts.json +13 -0
  53. package/tsconfig.tsbuildinfo +1 -1
  54. package/test/src/internal_mongodb_utils.test.ts +0 -103
@@ -1,14 +1,16 @@
1
- import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersync/lib-service-mongodb';
2
- import { container, DatabaseConnectionError, logger as defaultLogger, ErrorCode, ReplicationAbortedError, ReplicationAssertionError, ServiceError } from '@powersync/lib-services-framework';
3
- import { RelationCache, ReplicationLagTracker, SaveOperationTag } from '@powersync/service-core';
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import { container, DatabaseConnectionError, ErrorCode, ReplicationAbortedError, ReplicationAssertionError, ServiceError } from '@powersync/lib-services-framework';
3
+ import { PerformanceTracer, RelationCache, ReplicationLagTracker, SaveOperationTag } from '@powersync/service-core';
4
4
  import { ReplicationMetric } from '@powersync/service-types';
5
+ import { performance } from 'node:perf_hooks';
5
6
  import { MongoLSN } from '../common/MongoLSN.js';
6
7
  import { PostImagesOption } from '../types/types.js';
7
8
  import { escapeRegExp } from '../utils.js';
8
- import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
9
- import { constructAfterRecord, createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
9
+ import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
10
10
  import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
11
+ import { parseChangeDocument, rawChangeStream } from './RawChangeStream.js';
11
12
  import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
13
+ import { DirectSourceRowConverter } from './SourceRowConverter.js';
12
14
  /**
13
15
  * Thrown when the change stream is not valid anymore, and replication
14
16
  * must be restarted.
@@ -39,6 +41,7 @@ export class ChangeStream {
39
41
  logger;
40
42
  snapshotChunkLength;
41
43
  changeStreamTimeout;
44
+ sourceRowConverter;
42
45
  constructor(options) {
43
46
  this.storage = options.storage;
44
47
  this.metrics = options.metrics;
@@ -51,6 +54,7 @@ export class ChangeStream {
51
54
  this.sync_rules = options.storage.getParsedSyncRules({
52
55
  defaultSchema: this.defaultDb.databaseName
53
56
  });
57
+ this.sourceRowConverter = new DirectSourceRowConverter(this.sync_rules.compatibility);
54
58
  // The change stream aggregation command should timeout before the socket times out,
55
59
  // so we use 90% of the socket timeout value.
56
60
  this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
@@ -58,7 +62,7 @@ export class ChangeStream {
58
62
  this.abort_signal.addEventListener('abort', () => {
59
63
  // TODO: Fast abort?
60
64
  }, { once: true });
61
- this.logger = options.logger ?? defaultLogger;
65
+ this.logger = options.logger ?? this.storage.logger;
62
66
  }
63
67
  get stopped() {
64
68
  return this.abort_signal.aborted;
@@ -151,65 +155,74 @@ export class ChangeStream {
151
155
  const LSN_CREATE_INTERVAL_SECONDS = 1;
152
156
  // Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
153
157
  const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
154
- await using streamManager = this.openChangeStream({ lsn: firstCheckpointLsn, maxAwaitTimeMs: 0 });
155
- const { stream } = streamManager;
156
158
  const startTime = performance.now();
157
159
  let lastCheckpointCreated = performance.now();
158
160
  let eventsSeen = 0;
159
- while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
161
+ let batchesSeen = 0;
162
+ const filters = this.getSourceNamespaceFilters();
163
+ const iter = this.rawChangeStreamBatches({
164
+ lsn: firstCheckpointLsn,
165
+ maxAwaitTimeMS: 0,
166
+ signal: this.abort_signal,
167
+ filters
168
+ });
169
+ for await (let { events } of iter) {
170
+ if (performance.now() - startTime >= LSN_TIMEOUT_SECONDS * 1000) {
171
+ break;
172
+ }
160
173
  if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
161
174
  await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
162
175
  lastCheckpointCreated = performance.now();
163
176
  }
164
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
165
- const changeDocument = await stream.tryNext().catch((e) => {
166
- throw mapChangeStreamError(e);
167
- });
168
- if (changeDocument == null) {
169
- continue;
170
- }
171
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
172
- if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
173
- const checkpointId = changeDocument.documentKey._id;
174
- if (!this.checkpointStreamId.equals(checkpointId)) {
175
- continue;
177
+ batchesSeen += 1;
178
+ for (let rawChangeDocument of events) {
179
+ const changeDocument = parseChangeDocument(rawChangeDocument);
180
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
181
+ if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
182
+ const checkpointId = changeDocument.documentKey._id;
183
+ if (!this.checkpointStreamId.equals(checkpointId)) {
184
+ continue;
185
+ }
186
+ const { comparable: lsn } = new MongoLSN({
187
+ timestamp: changeDocument.clusterTime,
188
+ resume_token: changeDocument._id
189
+ });
190
+ return lsn;
176
191
  }
177
- const { comparable: lsn } = new MongoLSN({
178
- timestamp: changeDocument.clusterTime,
179
- resume_token: changeDocument._id
180
- });
181
- return lsn;
192
+ eventsSeen += 1;
182
193
  }
183
- eventsSeen += 1;
184
194
  }
185
195
  // Could happen if there is a very large replication lag?
186
- throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`);
196
+ throw new ServiceError(ErrorCode.PSYNC_S1301, `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}, batches = ${batchesSeen}`);
187
197
  }
188
198
  /**
189
199
  * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
190
200
  */
191
201
  async validateSnapshotLsn(lsn) {
192
- await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
193
- const { stream } = streamManager;
194
- try {
195
- // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
196
- await stream.tryNext();
197
- }
198
- catch (e) {
199
- // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
200
- // we possibly cannot recover from it.
201
- throw mapChangeStreamError(e);
202
+ const filters = this.getSourceNamespaceFilters();
203
+ const stream = this.rawChangeStreamBatches({
204
+ lsn: lsn,
205
+ // maxAwaitTimeMS should never actually be used here
206
+ maxAwaitTimeMS: 0,
207
+ filters
208
+ });
209
+ for await (let _batch of stream) {
210
+ // We got a response from the aggregate command, so consider the LSN valid.
211
+ // Close the stream immediately.
212
+ break;
202
213
  }
203
214
  }
204
215
  async initialReplication(snapshotLsn) {
205
216
  const sourceTables = this.sync_rules.getSourceTables();
206
217
  await this.client.connect();
218
+ const tracer = new PerformanceTracer('MongoDB initial replication');
207
219
  const flushResult = await this.storage.startBatch({
208
220
  logger: this.logger,
209
221
  zeroLSN: MongoLSN.ZERO.comparable,
210
222
  defaultSchema: this.defaultDb.databaseName,
211
223
  storeCurrentData: false,
212
- skipExistingRows: true
224
+ skipExistingRows: true,
225
+ tracer
213
226
  }, async (batch) => {
214
227
  if (snapshotLsn == null) {
215
228
  // First replication attempt - get a snapshot and store the timestamp
@@ -330,11 +343,6 @@ export class ChangeStream {
330
343
  }
331
344
  return { $match: nsFilter, multipleDatabases };
332
345
  }
333
- static *getQueryData(results) {
334
- for (let row of results) {
335
- yield constructAfterRecord(row);
336
- }
337
- }
338
346
  async snapshotTable(batch, table) {
339
347
  const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
340
348
  const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
@@ -369,8 +377,8 @@ export class ChangeStream {
369
377
  }
370
378
  // Pre-fetch next batch, so that we can read and write concurrently
371
379
  nextChunkPromise = query.nextChunk();
372
- for (let document of docBatch) {
373
- const record = this.constructAfterRecord(document);
380
+ for (let buffer of docBatch) {
381
+ const { row: record, replicaId: replicaId } = this.rawToSqliteRow(buffer);
374
382
  // This auto-flushes when the batch reaches its size limit
375
383
  await batch.save({
376
384
  tag: SaveOperationTag.INSERT,
@@ -378,7 +386,7 @@ export class ChangeStream {
378
386
  before: undefined,
379
387
  beforeReplicaId: undefined,
380
388
  after: record,
381
- afterReplicaId: document._id
389
+ afterReplicaId: replicaId
382
390
  });
383
391
  }
384
392
  // Important: flush before marking progress
@@ -461,7 +469,7 @@ export class ChangeStream {
461
469
  // Snapshot if:
462
470
  // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
463
471
  // 2. Snapshot is not already done, AND:
464
- // 3. The table is used in sync rules.
472
+ // 3. The table is used in sync config.
465
473
  const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
466
474
  if (shouldSnapshot) {
467
475
  this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
@@ -474,24 +482,23 @@ export class ChangeStream {
474
482
  }
475
483
  return result.table;
476
484
  }
477
- constructAfterRecord(document) {
478
- const inputRow = constructAfterRecord(document);
479
- return this.sync_rules.applyRowContext(inputRow);
480
- }
481
485
  async writeChange(batch, table, change) {
482
486
  if (!table.syncAny) {
483
- this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`);
487
+ this.logger.debug(`Collection ${table.qualifiedName} not used in sync config - skipping`);
484
488
  return null;
485
489
  }
486
490
  this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
487
491
  if (change.operationType == 'insert') {
488
- const baseRecord = this.constructAfterRecord(change.fullDocument);
492
+ const { row: baseRecord, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument);
489
493
  return await batch.save({
490
494
  tag: SaveOperationTag.INSERT,
491
495
  sourceTable: table,
492
496
  before: undefined,
493
497
  beforeReplicaId: undefined,
494
498
  after: baseRecord,
499
+ // Same as _replicaId
500
+ // We specifically need to use the source _id, not the converted one in baseRecord,
501
+ // to preserve _id uniqueness properties.
495
502
  afterReplicaId: change.documentKey._id
496
503
  });
497
504
  }
@@ -505,14 +512,14 @@ export class ChangeStream {
505
512
  beforeReplicaId: change.documentKey._id
506
513
  });
507
514
  }
508
- const after = this.constructAfterRecord(change.fullDocument);
515
+ const { row: after, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument);
509
516
  return await batch.save({
510
517
  tag: SaveOperationTag.UPDATE,
511
518
  sourceTable: table,
512
519
  before: undefined,
513
520
  beforeReplicaId: undefined,
514
521
  after: after,
515
- afterReplicaId: change.documentKey._id
522
+ afterReplicaId: change.documentKey._id // Same as _replicaId
516
523
  });
517
524
  }
518
525
  else if (change.operationType == 'delete') {
@@ -549,7 +556,7 @@ export class ChangeStream {
549
556
  }
550
557
  const { lastOpId } = await this.initialReplication(result.snapshotLsn);
551
558
  if (lastOpId != null) {
552
- // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules.
559
+ // Populate the cache _after_ initial replication, but _before_ we switch to this replication stream.
553
560
  await this.storage.populatePersistentChecksumCache({
554
561
  signal: this.abort_signal,
555
562
  // No checkpoint yet, but we do have the opId.
@@ -571,17 +578,11 @@ export class ChangeStream {
571
578
  throw e;
572
579
  }
573
580
  }
574
- openChangeStream(options) {
581
+ rawChangeStreamBatches(options) {
575
582
  const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
576
583
  const startAfter = lastLsn?.timestamp;
577
584
  const resumeAfter = lastLsn?.resumeToken;
578
- const filters = this.getSourceNamespaceFilters();
579
- const pipeline = [
580
- {
581
- $match: filters.$match
582
- },
583
- { $changeStreamSplitLargeEvent: {} }
584
- ];
585
+ const filters = options.filters;
585
586
  let fullDocument;
586
587
  if (this.usePostImages) {
587
588
  // 'read_only' or 'auto_configure'
@@ -594,10 +595,17 @@ export class ChangeStream {
594
595
  }
595
596
  const streamOptions = {
596
597
  showExpandedEvents: true,
597
- maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS,
598
- fullDocument: fullDocument,
599
- maxTimeMS: this.changeStreamTimeout
598
+ fullDocument: fullDocument
600
599
  };
600
+ const pipeline = [
601
+ {
602
+ $changeStream: streamOptions
603
+ },
604
+ {
605
+ $match: filters.$match
606
+ },
607
+ { $changeStreamSplitLargeEvent: {} }
608
+ ];
601
609
  /**
602
610
  * Only one of these options can be supplied at a time.
603
611
  */
@@ -610,48 +618,38 @@ export class ChangeStream {
610
618
  // This is also relevant for getSnapshotLSN().
611
619
  streamOptions.startAtOperationTime = startAfter;
612
620
  }
613
- let stream;
621
+ let watchDb;
614
622
  if (filters.multipleDatabases) {
615
- // Requires readAnyDatabase@admin on Atlas
616
- stream = this.client.watch(pipeline, streamOptions);
623
+ watchDb = this.client.db('admin');
624
+ streamOptions.allChangesForCluster = true;
617
625
  }
618
626
  else {
619
- // Same general result, but requires less permissions than the above
620
- stream = this.defaultDb.watch(pipeline, streamOptions);
627
+ watchDb = this.defaultDb;
621
628
  }
622
- this.abort_signal.addEventListener('abort', () => {
623
- stream.close();
629
+ return rawChangeStream(watchDb, pipeline, {
630
+ batchSize: options.batchSize ?? this.snapshotChunkLength,
631
+ maxAwaitTimeMS: options.maxAwaitTimeMS ?? this.maxAwaitTimeMS,
632
+ maxTimeMS: this.changeStreamTimeout,
633
+ signal: options.signal,
634
+ logger: this.logger,
635
+ tracer: options.tracer
624
636
  });
625
- return {
626
- stream,
627
- filters,
628
- [Symbol.asyncDispose]: async () => {
629
- return stream.close();
630
- }
631
- };
632
637
  }
633
- getBufferedChangeCount(stream) {
634
- // The driver keeps fetched change stream documents on the underlying cursor, but does
635
- // not expose that through the public ChangeStream API. We use this to detect backlog
636
- // building up before we have processed the corresponding source changes locally.
637
- // If the driver API changes, we'll have a hard error here.
638
- // We specifically want to avoid a silent performance regression if the driver behavior changes.
639
- const cursor = stream.cursor;
640
- if (cursor == null || typeof cursor.bufferedCount != 'function') {
641
- throw new ReplicationAssertionError('MongoDB ChangeStream no longer exposes an internal cursor with bufferedCount');
642
- }
643
- return cursor.bufferedCount();
638
+ rawToSqliteRow(row) {
639
+ return this.sourceRowConverter.rawToSqliteRow(row);
644
640
  }
645
641
  async streamChangesInternal() {
646
642
  const transactionsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED);
647
643
  const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
648
644
  const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
645
+ const tracer = new PerformanceTracer('MongoDB streaming replication');
649
646
  await this.storage.startBatch({
650
647
  logger: this.logger,
651
648
  zeroLSN: MongoLSN.ZERO.comparable,
652
649
  defaultSchema: this.defaultDb.databaseName,
653
650
  // We get a complete postimage for every change, so we don't need to store the current data.
654
- storeCurrentData: false
651
+ storeCurrentData: false,
652
+ tracer
655
653
  }, async (batch) => {
656
654
  const { resumeFromLsn } = batch;
657
655
  if (resumeFromLsn == null) {
@@ -659,20 +657,18 @@ export class ChangeStream {
659
657
  }
660
658
  const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
661
659
  const startAfter = lastLsn?.timestamp;
660
+ let outerSpan = tracer.span('batch');
662
661
  // It is normal for this to be a minute or two old when there is a low volume
663
662
  // of ChangeStream events.
664
663
  const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
665
664
  this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
666
- await using streamManager = this.openChangeStream({ lsn: resumeFromLsn });
667
- const { stream, filters } = streamManager;
668
- if (this.abort_signal.aborted) {
669
- await stream.close();
670
- return;
671
- }
672
- trackChangeStreamBsonBytes(stream, (bytes) => {
673
- bytesReplicatedMetric.add(bytes);
674
- // Each of these represent a single response message from MongoDB.
675
- chunksReplicatedMetric.add(1);
665
+ const filters = this.getSourceNamespaceFilters();
666
+ // This is closed when the for loop below returns/breaks/throws
667
+ const batchStream = this.rawChangeStreamBatches({
668
+ lsn: resumeFromLsn,
669
+ filters,
670
+ signal: this.abort_signal,
671
+ tracer
676
672
  });
677
673
  // Always start with a checkpoint.
678
674
  // This helps us to clear errors when restarting, even if there is
@@ -680,36 +676,24 @@ export class ChangeStream {
680
676
  let waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
681
677
  let splitDocument = null;
682
678
  let flexDbNameWorkaroundLogged = false;
683
- let changesSinceLastCheckpoint = 0;
684
679
  let lastEmptyResume = performance.now();
685
680
  let lastTxnKey = null;
686
- while (true) {
687
- if (this.abort_signal.aborted) {
688
- break;
689
- }
690
- const originalChangeDocument = await stream.tryNext().catch((e) => {
691
- throw mapChangeStreamError(e);
692
- });
693
- // The stream was closed, we will only ever receive `null` from it
694
- if (!originalChangeDocument && stream.closed) {
695
- break;
696
- }
681
+ for await (let eventBatch of batchStream) {
682
+ const { events, resumeToken } = eventBatch;
683
+ using batchSpan = tracer.span('processing');
684
+ bytesReplicatedMetric.add(eventBatch.byteSize);
685
+ chunksReplicatedMetric.add(1);
697
686
  if (this.abort_signal.aborted) {
698
687
  break;
699
688
  }
700
- if (originalChangeDocument == null) {
701
- // We get a new null document after `maxAwaitTimeMS` if there were no other events.
702
- // In this case, stream.resumeToken is the resume token associated with the last response.
703
- // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
704
- // does update it.
705
- // From observed behavior, the actual resumeToken changes around once every 10 seconds.
689
+ this.touch();
690
+ if (events.length == 0) {
691
+ // No changes in this batch, but we still want to keep the connection alive.
692
+ // We do this by persisting a keepalive checkpoint.
706
693
  // If we don't update it on empty events, we do keep consistency, but resuming the stream
707
694
  // with old tokens may cause connection timeouts.
708
- // We throttle this further by only persisting a keepalive once a minute.
709
- // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
710
- // doing a keepalive in the middle of a transaction.
711
695
  if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
712
- const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
696
+ const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(resumeToken);
713
697
  await batch.keepalive(lsn);
714
698
  this.touch();
715
699
  lastEmptyResume = performance.now();
@@ -718,197 +702,214 @@ export class ChangeStream {
718
702
  this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`);
719
703
  this.replicationLag.markStarted();
720
704
  }
721
- continue;
722
- }
723
- this.touch();
724
- if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
725
- continue;
726
- }
727
- let changeDocument = originalChangeDocument;
728
- if (originalChangeDocument?.splitEvent != null) {
729
- // Handle split events from $changeStreamSplitLargeEvent.
730
- // This is only relevant for very large update operations.
731
- const splitEvent = originalChangeDocument?.splitEvent;
732
- if (splitDocument == null) {
733
- splitDocument = originalChangeDocument;
734
- }
735
- else {
736
- splitDocument = Object.assign(splitDocument, originalChangeDocument);
737
- }
738
- if (splitEvent.fragment == splitEvent.of) {
739
- // Got all fragments
740
- changeDocument = splitDocument;
741
- splitDocument = null;
742
- }
743
- else {
744
- // Wait for more fragments
705
+ // If we have no changes, we can just persist the keepalive.
706
+ // This is throttled to once per minute.
707
+ if (performance.now() - lastEmptyResume < 60_000) {
745
708
  continue;
746
709
  }
747
710
  }
748
- else if (splitDocument != null) {
749
- // We were waiting for fragments, but got a different event
750
- throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
751
- }
752
- if (!filters.multipleDatabases &&
753
- 'ns' in changeDocument &&
754
- changeDocument.ns.db != this.defaultDb.databaseName &&
755
- changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
756
- // When all of the following conditions are met:
757
- // 1. We're replicating from an Atlas Flex instance.
758
- // 2. There were changestream events recorded while the PowerSync service is paused.
759
- // 3. We're only replicating from a single database.
760
- // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
761
- // instead of the expected {db: 'ps'}.
762
- // We correct this.
763
- changeDocument.ns.db = this.defaultDb.databaseName;
764
- if (!flexDbNameWorkaroundLogged) {
765
- flexDbNameWorkaroundLogged = true;
766
- this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
767
- }
768
- }
769
- const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
770
- if (ns?.coll == CHECKPOINTS_COLLECTION) {
771
- /**
772
- * Dropping the database does not provide an `invalidate` event.
773
- * We typically would receive `drop` events for the collection which we
774
- * would process below.
775
- *
776
- * However we don't commit the LSN after collections are dropped.
777
- * The prevents the `startAfter` or `resumeToken` from advancing past the drop events.
778
- * The stream also closes after the drop events.
779
- * This causes an infinite loop of processing the collection drop events.
780
- *
781
- * This check here invalidates the change stream if our `_checkpoints` collection
782
- * is dropped. This allows for detecting when the DB is dropped.
783
- */
784
- if (changeDocument.operationType == 'drop') {
785
- throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_checkpoints collection was dropped'));
711
+ this.touch();
712
+ for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
713
+ const rawChangeDocument = events[eventIndex];
714
+ const originalChangeDocument = parseChangeDocument(rawChangeDocument);
715
+ if (this.abort_signal.aborted) {
716
+ break;
786
717
  }
787
- if (!(changeDocument.operationType == 'insert' ||
788
- changeDocument.operationType == 'update' ||
789
- changeDocument.operationType == 'replace')) {
718
+ if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
790
719
  continue;
791
720
  }
792
- // We handle two types of checkpoint events:
793
- // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
794
- // immediately, regardless of where they were created.
795
- // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
796
- // limiting of commits, so we specifically want to exclude checkpoints from other streams.
797
- //
798
- // It may be useful to also throttle commits due to standalone checkpoints in the future.
799
- // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
800
- const checkpointId = changeDocument.documentKey._id;
801
- if (checkpointId == STANDALONE_CHECKPOINT_ID) {
802
- // Standalone / write checkpoint received.
803
- // When we are caught up, commit immediately to keep write checkpoint latency low.
804
- // Once there is already a batch checkpoint pending, or the driver has buffered more
805
- // change stream events, collapse standalone checkpoints into the normal batch
806
- // checkpoint flow to avoid commit churn under sustained load.
807
- if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
808
- if (waitForCheckpointLsn == null) {
809
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
810
- }
721
+ let changeDocument = originalChangeDocument;
722
+ if (originalChangeDocument?.splitEvent != null) {
723
+ // Handle split events from $changeStreamSplitLargeEvent.
724
+ // This is only relevant for very large update operations.
725
+ const splitEvent = originalChangeDocument?.splitEvent;
726
+ if (splitDocument == null) {
727
+ splitDocument = originalChangeDocument;
728
+ }
729
+ else {
730
+ splitDocument = Object.assign(splitDocument, originalChangeDocument);
731
+ }
732
+ if (splitEvent.fragment == splitEvent.of) {
733
+ // Got all fragments
734
+ changeDocument = splitDocument;
735
+ splitDocument = null;
736
+ }
737
+ else {
738
+ // Wait for more fragments
811
739
  continue;
812
740
  }
813
741
  }
814
- else if (!this.checkpointStreamId.equals(checkpointId)) {
815
- continue;
816
- }
817
- const { comparable: lsn } = new MongoLSN({
818
- timestamp: changeDocument.clusterTime,
819
- resume_token: changeDocument._id
820
- });
821
- if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
822
- // Checkpoint out of order - should never happen with MongoDB.
823
- // If it does happen, we throw an error to stop the replication - restarting should recover.
824
- // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
825
- // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
826
- // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
827
- throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
828
- }
829
- if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
830
- waitForCheckpointLsn = null;
742
+ else if (splitDocument != null) {
743
+ // We were waiting for fragments, but got a different event
744
+ throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
831
745
  }
832
- const { checkpointBlocked } = await batch.commit(lsn, {
833
- oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
834
- });
835
- if (!checkpointBlocked) {
836
- this.replicationLag.markCommitted();
837
- changesSinceLastCheckpoint = 0;
746
+ if (!filters.multipleDatabases &&
747
+ 'ns' in changeDocument &&
748
+ changeDocument.ns.db != this.defaultDb.databaseName &&
749
+ changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)) {
750
+ // When all of the following conditions are met:
751
+ // 1. We're replicating from an Atlas Flex instance.
752
+ // 2. There were changestream events recorded while the PowerSync service is paused.
753
+ // 3. We're only replicating from a single database.
754
+ // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
755
+ // instead of the expected {db: 'ps'}.
756
+ // We correct this.
757
+ changeDocument.ns.db = this.defaultDb.databaseName;
758
+ if (!flexDbNameWorkaroundLogged) {
759
+ flexDbNameWorkaroundLogged = true;
760
+ this.logger.warn(`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`);
761
+ }
838
762
  }
839
- }
840
- else if (changeDocument.operationType == 'insert' ||
841
- changeDocument.operationType == 'update' ||
842
- changeDocument.operationType == 'replace' ||
843
- changeDocument.operationType == 'delete') {
844
- if (waitForCheckpointLsn == null) {
845
- waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
763
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
764
+ if (ns?.coll == CHECKPOINTS_COLLECTION) {
765
+ /**
766
+ * Dropping the database does not provide an `invalidate` event.
767
+ * We typically would receive `drop` events for the collection which we
768
+ * would process below.
769
+ *
770
+ * However we don't commit the LSN after collections are dropped.
771
+ * This prevents the `startAfter` or `resumeToken` from advancing past the drop events.
772
+ * The stream also closes after the drop events.
773
+ * This causes an infinite loop of processing the collection drop events.
774
+ *
775
+ * This check here invalidates the change stream if our `_powersync_checkpoints` collection
776
+ * is dropped. This allows for detecting when the DB is dropped.
777
+ */
778
+ if (changeDocument.operationType == 'drop') {
779
+ throw new ChangeStreamInvalidatedError('Internal collections have been dropped', new Error('_powersync_checkpoints collection was dropped'));
780
+ }
781
+ if (!(changeDocument.operationType == 'insert' ||
782
+ changeDocument.operationType == 'update' ||
783
+ changeDocument.operationType == 'replace')) {
784
+ continue;
785
+ }
786
+ // We handle two types of checkpoint events:
787
+ // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
788
+ // immediately, regardless of where they were created.
789
+ // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
790
+ // limiting of commits, so we specifically want to exclude checkpoints from other streams.
791
+ //
792
+ // It may be useful to also throttle commits due to standalone checkpoints in the future.
793
+ // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
794
+ const checkpointId = changeDocument.documentKey._id;
795
+ if (checkpointId == STANDALONE_CHECKPOINT_ID) {
796
+ // Standalone / write checkpoint received.
797
+ // When we are caught up, commit immediately to keep write checkpoint latency low.
798
+ // Once there is already a batch checkpoint pending, or the driver has buffered more
799
+ // change stream events, collapse standalone checkpoints into the normal batch
800
+ // checkpoint flow to avoid commit churn under sustained load.
801
+ const hasBufferedChanges = eventIndex < events.length - 1;
802
+ if (waitForCheckpointLsn != null || hasBufferedChanges) {
803
+ if (waitForCheckpointLsn == null) {
804
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
805
+ }
806
+ continue;
807
+ }
808
+ }
809
+ else if (!this.checkpointStreamId.equals(checkpointId)) {
810
+ continue;
811
+ }
812
+ const { comparable: lsn } = new MongoLSN({
813
+ timestamp: changeDocument.clusterTime,
814
+ resume_token: changeDocument._id
815
+ });
816
+ if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
817
+ // Checkpoint out of order - should never happen with MongoDB.
818
+ // If it does happen, we throw an error to stop the replication - restarting should recover.
819
+ // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
820
+ // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
821
+ // This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
822
+ throw new ReplicationAssertionError(`Change resumeToken ${changeDocument._id._data} (${timestampToDate(changeDocument.clusterTime).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`);
823
+ }
824
+ if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
825
+ waitForCheckpointLsn = null;
826
+ }
827
+ const { checkpointBlocked } = await batch.commit(lsn, {
828
+ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
829
+ });
830
+ if (!checkpointBlocked) {
831
+ this.replicationLag.markCommitted();
832
+ }
846
833
  }
847
- const rel = getMongoRelation(changeDocument.ns);
848
- const table = await this.getRelation(batch, rel, {
849
- // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
850
- // for whatever reason, then we do need to snapshot it.
851
- // This may result in some duplicate operations when a collection is created for the first time after
852
- // sync rules was deployed.
853
- snapshot: true
854
- });
855
- if (table.syncAny) {
856
- this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
857
- const transactionKeyValue = transactionKey(changeDocument);
858
- if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
859
- // Very crude metric for counting transactions replicated.
860
- // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
861
- // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
862
- lastTxnKey = transactionKeyValue;
863
- transactionsReplicatedMetric.add(1);
834
+ else if (changeDocument.operationType == 'insert' ||
835
+ changeDocument.operationType == 'update' ||
836
+ changeDocument.operationType == 'replace' ||
837
+ changeDocument.operationType == 'delete') {
838
+ if (waitForCheckpointLsn == null) {
839
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
864
840
  }
865
- const flushResult = await this.writeChange(batch, table, changeDocument);
866
- changesSinceLastCheckpoint += 1;
867
- if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
868
- // When we are catching up replication after an initial snapshot, there may be a very long delay
869
- // before we do a commit(). In that case, we need to periodically persist the resume LSN, so
870
- // we don't restart from scratch if we restart replication.
871
- // The same could apply if we need to catch up on replication after some downtime.
872
- const { comparable: lsn } = new MongoLSN({
873
- timestamp: changeDocument.clusterTime,
874
- resume_token: changeDocument._id
875
- });
876
- this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`);
877
- await batch.setResumeLsn(lsn);
878
- changesSinceLastCheckpoint = 0;
841
+ const rel = getMongoRelation(changeDocument.ns);
842
+ const table = await this.getRelation(batch, rel, {
843
+ // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
844
+ // for whatever reason, then we do need to snapshot it.
845
+ // This may result in some duplicate operations when a collection is created for the first time after
846
+ // sync config was deployed.
847
+ snapshot: true
848
+ });
849
+ if (table.syncAny) {
850
+ this.replicationLag.trackUncommittedChange(changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime));
851
+ const transactionKeyValue = transactionKey(changeDocument);
852
+ if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
853
+ // Very crude metric for counting transactions replicated.
854
+ // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
855
+ // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
856
+ lastTxnKey = transactionKeyValue;
857
+ transactionsReplicatedMetric.add(1);
858
+ }
859
+ await this.writeChange(batch, table, changeDocument);
879
860
  }
880
861
  }
881
- }
882
- else if (changeDocument.operationType == 'drop') {
883
- const rel = getMongoRelation(changeDocument.ns);
884
- const table = await this.getRelation(batch, rel, {
885
- // We're "dropping" this collection, so never snapshot it.
886
- snapshot: false
887
- });
888
- if (table.syncAny) {
889
- await batch.drop([table]);
890
- this.relationCache.delete(table);
862
+ else if (changeDocument.operationType == 'drop') {
863
+ const rel = getMongoRelation(changeDocument.ns);
864
+ const table = await this.getRelation(batch, rel, {
865
+ // We're "dropping" this collection, so never snapshot it.
866
+ snapshot: false
867
+ });
868
+ if (table.syncAny) {
869
+ await batch.drop([table]);
870
+ this.relationCache.delete(table);
871
+ }
891
872
  }
892
- }
893
- else if (changeDocument.operationType == 'rename') {
894
- const relFrom = getMongoRelation(changeDocument.ns);
895
- const relTo = getMongoRelation(changeDocument.to);
896
- const tableFrom = await this.getRelation(batch, relFrom, {
897
- // We're "dropping" this collection, so never snapshot it.
898
- snapshot: false
899
- });
900
- if (tableFrom.syncAny) {
901
- await batch.drop([tableFrom]);
902
- this.relationCache.delete(relFrom);
873
+ else if (changeDocument.operationType == 'rename') {
874
+ const relFrom = getMongoRelation(changeDocument.ns);
875
+ const relTo = getMongoRelation(changeDocument.to);
876
+ const tableFrom = await this.getRelation(batch, relFrom, {
877
+ // We're "dropping" this collection, so never snapshot it.
878
+ snapshot: false
879
+ });
880
+ if (tableFrom.syncAny) {
881
+ await batch.drop([tableFrom]);
882
+ this.relationCache.delete(relFrom);
883
+ }
884
+ // Here we do need to snapshot the new table
885
+ const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
886
+ await this.handleRelation(batch, relTo, {
887
+ // This is a new (renamed) collection, so always snapshot it.
888
+ snapshot: true,
889
+ collectionInfo: collection
890
+ });
903
891
  }
904
- // Here we do need to snapshot the new table
905
- const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
906
- await this.handleRelation(batch, relTo, {
907
- // This is a new (renamed) collection, so always snapshot it.
908
- snapshot: true,
909
- collectionInfo: collection
910
- });
911
892
  }
893
+ if (splitDocument == null) {
894
+ // We flush and mark progress on every batch of data we receive.
895
+ // Batches are generally large (64MB or 6000 events, whichever comes first),
896
+ // so this is a good natural point to flush and mark progress.
897
+ // We avoid this when splitDocument is set, since we cannot resume in the middle of a split event.
898
+ const { comparable: lsn } = MongoLSN.fromResumeToken(resumeToken);
899
+ await batch.flush({ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange });
900
+ // TODO: We should consider making this standard behavior of flush().
901
+ await batch.setResumeLsn(lsn);
902
+ }
903
+ batchSpan.end();
904
+ const durations = outerSpan.end();
905
+ const duration = batchSpan.endAt - batchSpan.startAt;
906
+ this.logger.info(`Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`, {
907
+ count: events.length,
908
+ bytes: eventBatch.byteSize,
909
+ duration,
910
+ t: durations
911
+ });
912
+ outerSpan = tracer.span('batch');
912
913
  }
913
914
  });
914
915
  }
@@ -926,29 +927,6 @@ export class ChangeStream {
926
927
  }
927
928
  }
928
929
  }
929
- function mapChangeStreamError(e) {
930
- if (isMongoNetworkTimeoutError(e)) {
931
- // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out".
932
- // We wrap the error to make it more useful.
933
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
934
- }
935
- else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
936
- // maxTimeMS was reached. Example message:
937
- // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit
938
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
939
- }
940
- else if (isMongoServerError(e) &&
941
- e.codeName == 'NoMatchingDocument' &&
942
- e.errmsg?.includes('post-image was not found')) {
943
- throw new ChangeStreamInvalidatedError(e.errmsg, e);
944
- }
945
- else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) {
946
- throw new ChangeStreamInvalidatedError(e.message, e);
947
- }
948
- else {
949
- throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e);
950
- }
951
- }
952
930
  /**
953
931
  * Transaction key for a change stream event, used to detect transaction boundaries. Returns null if the event is not part of a transaction.
954
932
  */