@powersync/service-module-mongodb 0.15.1 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/replication/ChangeStream.d.ts +4 -12
  3. package/dist/replication/ChangeStream.js +378 -443
  4. package/dist/replication/ChangeStream.js.map +1 -1
  5. package/dist/replication/ChangeStreamReplicationJob.d.ts +1 -1
  6. package/dist/replication/ChangeStreamReplicationJob.js +1 -1
  7. package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
  8. package/dist/replication/ChangeStreamReplicator.d.ts +0 -1
  9. package/dist/replication/ChangeStreamReplicator.js +0 -21
  10. package/dist/replication/ChangeStreamReplicator.js.map +1 -1
  11. package/dist/replication/MongoSnapshotQuery.d.ts +2 -0
  12. package/dist/replication/MongoSnapshotQuery.js +4 -2
  13. package/dist/replication/MongoSnapshotQuery.js.map +1 -1
  14. package/dist/replication/internal-mongodb-utils.d.ts +21 -0
  15. package/dist/replication/internal-mongodb-utils.js +70 -0
  16. package/dist/replication/internal-mongodb-utils.js.map +1 -0
  17. package/dist/replication/replication-index.d.ts +1 -0
  18. package/dist/replication/replication-index.js +1 -0
  19. package/dist/replication/replication-index.js.map +1 -1
  20. package/package.json +8 -8
  21. package/src/replication/ChangeStream.ts +84 -31
  22. package/src/replication/ChangeStreamReplicationJob.ts +1 -1
  23. package/src/replication/ChangeStreamReplicator.ts +0 -21
  24. package/src/replication/MongoSnapshotQuery.ts +7 -3
  25. package/src/replication/internal-mongodb-utils.ts +91 -0
  26. package/src/replication/replication-index.ts +1 -0
  27. package/test/src/change_stream.test.ts +57 -1
  28. package/test/src/internal_mongodb_utils.test.ts +103 -0
  29. package/tsconfig.tsbuildinfo +1 -1
@@ -12,6 +12,7 @@ import {
12
12
  import {
13
13
  MetricsEngine,
14
14
  RelationCache,
15
+ ReplicationLagTracker,
15
16
  SaveOperationTag,
16
17
  SourceEntityDescriptor,
17
18
  SourceTable,
@@ -19,15 +20,16 @@ import {
19
20
  } from '@powersync/service-core';
20
21
  import {
21
22
  DatabaseInputRow,
23
+ HydratedSyncRules,
22
24
  SqliteInputRow,
23
25
  SqliteRow,
24
- HydratedSyncRules,
25
26
  TablePattern
26
27
  } from '@powersync/service-sync-rules';
27
28
  import { ReplicationMetric } from '@powersync/service-types';
28
29
  import { MongoLSN } from '../common/MongoLSN.js';
29
30
  import { PostImagesOption } from '../types/types.js';
30
31
  import { escapeRegExp } from '../utils.js';
32
+ import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
31
33
  import { MongoManager } from './MongoManager.js';
32
34
  import {
33
35
  constructAfterRecord,
@@ -98,16 +100,7 @@ export class ChangeStream {
98
100
 
99
101
  private relationCache = new RelationCache(getCacheIdentifier);
100
102
 
101
- /**
102
- * Time of the oldest uncommitted change, according to the source db.
103
- * This is used to determine the replication lag.
104
- */
105
- private oldestUncommittedChange: Date | null = null;
106
- /**
107
- * Keep track of whether we have done a commit or keepalive yet.
108
- * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
109
- */
110
- private isStartingReplication = true;
103
+ private replicationLag = new ReplicationLagTracker();
111
104
 
112
105
  private checkpointStreamId = new mongo.ObjectId();
113
106
 
@@ -479,6 +472,10 @@ export class ChangeStream {
479
472
  }
480
473
 
481
474
  private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
475
+ const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
476
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
477
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
478
+
482
479
  const totalEstimatedCount = await this.estimatedCountNumber(table);
483
480
  let at = table.snapshotStatus?.replicatedCount ?? 0;
484
481
  const db = this.client.db(table.schema);
@@ -499,11 +496,13 @@ export class ChangeStream {
499
496
  let lastBatch = performance.now();
500
497
  let nextChunkPromise = query.nextChunk();
501
498
  while (true) {
502
- const { docs: docBatch, lastKey } = await nextChunkPromise;
499
+ const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
503
500
  if (docBatch.length == 0) {
504
501
  // No more data - stop iterating
505
502
  break;
506
503
  }
504
+ bytesReplicatedMetric.add(chunkBytes);
505
+ chunksReplicatedMetric.add(1);
507
506
 
508
507
  if (this.abort_signal.aborted) {
509
508
  throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
@@ -528,7 +527,7 @@ export class ChangeStream {
528
527
  // Important: flush before marking progress
529
528
  await batch.flush();
530
529
  at += docBatch.length;
531
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length);
530
+ rowsReplicatedMetric.add(docBatch.length);
532
531
 
533
532
  table = await batch.updateTableProgress(table, {
534
533
  lastKey,
@@ -819,7 +818,30 @@ export class ChangeStream {
819
818
  };
820
819
  }
821
820
 
821
+ private getBufferedChangeCount(stream: mongo.ChangeStream<mongo.Document>): number {
822
+ // The driver keeps fetched change stream documents on the underlying cursor, but does
823
+ // not expose that through the public ChangeStream API. We use this to detect backlog
824
+ // building up before we have processed the corresponding source changes locally.
825
+ // If the driver API changes, we'll have a hard error here.
826
+ // We specifically want to avoid a silent performance regression if the driver behavior changes.
827
+ const cursor = (
828
+ stream as mongo.ChangeStream<mongo.Document> & {
829
+ cursor: mongo.AbstractCursor<mongo.ChangeStreamDocument<mongo.Document>>;
830
+ }
831
+ ).cursor;
832
+ if (cursor == null || typeof cursor.bufferedCount != 'function') {
833
+ throw new ReplicationAssertionError(
834
+ 'MongoDB ChangeStream no longer exposes an internal cursor with bufferedCount'
835
+ );
836
+ }
837
+ return cursor.bufferedCount();
838
+ }
839
+
822
840
  async streamChangesInternal() {
841
+ const transactionsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED);
842
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
843
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
844
+
823
845
  await this.storage.startBatch(
824
846
  {
825
847
  logger: this.logger,
@@ -848,6 +870,11 @@ export class ChangeStream {
848
870
  await stream.close();
849
871
  return;
850
872
  }
873
+ trackChangeStreamBsonBytes(stream, (bytes) => {
874
+ bytesReplicatedMetric.add(bytes);
875
+ // Each of these represent a single response message from MongoDB.
876
+ chunksReplicatedMetric.add(1);
877
+ });
851
878
 
852
879
  // Always start with a checkpoint.
853
880
  // This helps us to clear errors when restarting, even if there is
@@ -864,6 +891,7 @@ export class ChangeStream {
864
891
  let changesSinceLastCheckpoint = 0;
865
892
 
866
893
  let lastEmptyResume = performance.now();
894
+ let lastTxnKey: string | null = null;
867
895
 
868
896
  while (true) {
869
897
  if (this.abort_signal.aborted) {
@@ -903,7 +931,7 @@ export class ChangeStream {
903
931
  this.logger.info(
904
932
  `Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`
905
933
  );
906
- this.isStartingReplication = false;
934
+ this.replicationLag.markStarted();
907
935
  }
908
936
  continue;
909
937
  }
@@ -1005,7 +1033,20 @@ export class ChangeStream {
1005
1033
  // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
1006
1034
 
1007
1035
  const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
1008
- if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) {
1036
+
1037
+ if (checkpointId == STANDALONE_CHECKPOINT_ID) {
1038
+ // Standalone / write checkpoint received.
1039
+ // When we are caught up, commit immediately to keep write checkpoint latency low.
1040
+ // Once there is already a batch checkpoint pending, or the driver has buffered more
1041
+ // change stream events, collapse standalone checkpoints into the normal batch
1042
+ // checkpoint flow to avoid commit churn under sustained load.
1043
+ if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
1044
+ if (waitForCheckpointLsn == null) {
1045
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1046
+ }
1047
+ continue;
1048
+ }
1049
+ } else if (!this.checkpointStreamId.equals(checkpointId)) {
1009
1050
  continue;
1010
1051
  }
1011
1052
  const { comparable: lsn } = new MongoLSN({
@@ -1027,12 +1068,11 @@ export class ChangeStream {
1027
1068
  waitForCheckpointLsn = null;
1028
1069
  }
1029
1070
  const { checkpointBlocked } = await batch.commit(lsn, {
1030
- oldestUncommittedChange: this.oldestUncommittedChange
1071
+ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
1031
1072
  });
1032
1073
 
1033
1074
  if (!checkpointBlocked) {
1034
- this.oldestUncommittedChange = null;
1035
- this.isStartingReplication = false;
1075
+ this.replicationLag.markCommitted();
1036
1076
  changesSinceLastCheckpoint = 0;
1037
1077
  }
1038
1078
  } else if (
@@ -1044,6 +1084,7 @@ export class ChangeStream {
1044
1084
  if (waitForCheckpointLsn == null) {
1045
1085
  waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1046
1086
  }
1087
+
1047
1088
  const rel = getMongoRelation(changeDocument.ns);
1048
1089
  const table = await this.getRelation(batch, rel, {
1049
1090
  // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
@@ -1053,9 +1094,20 @@ export class ChangeStream {
1053
1094
  snapshot: true
1054
1095
  });
1055
1096
  if (table.syncAny) {
1056
- if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
1057
- this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
1097
+ this.replicationLag.trackUncommittedChange(
1098
+ changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime)
1099
+ );
1100
+
1101
+ const transactionKeyValue = transactionKey(changeDocument);
1102
+
1103
+ if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
1104
+ // Very crude metric for counting transactions replicated.
1105
+ // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
1106
+ // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
1107
+ lastTxnKey = transactionKeyValue;
1108
+ transactionsReplicatedMetric.add(1);
1058
1109
  }
1110
+
1059
1111
  const flushResult = await this.writeChange(batch, table, changeDocument);
1060
1112
  changesSinceLastCheckpoint += 1;
1061
1113
  if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
@@ -1106,17 +1158,8 @@ export class ChangeStream {
1106
1158
  );
1107
1159
  }
1108
1160
 
1109
- async getReplicationLagMillis(): Promise<number | undefined> {
1110
- if (this.oldestUncommittedChange == null) {
1111
- if (this.isStartingReplication) {
1112
- // We don't have anything to compute replication lag with yet.
1113
- return undefined;
1114
- } else {
1115
- // We don't have any uncommitted changes, so replication is up-to-date.
1116
- return 0;
1117
- }
1118
- }
1119
- return Date.now() - this.oldestUncommittedChange.getTime();
1161
+ getReplicationLagMillis(): number | undefined {
1162
+ return this.replicationLag.getLagMillis();
1120
1163
  }
1121
1164
 
1122
1165
  private lastTouchedAt = performance.now();
@@ -1153,3 +1196,13 @@ function mapChangeStreamError(e: any) {
1153
1196
  throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e);
1154
1197
  }
1155
1198
  }
1199
+
1200
+ /**
1201
+ * Transaction key for a change stream event, used to detect transaction boundaries. Returns null if the event is not part of a transaction.
1202
+ */
1203
+ function transactionKey(doc: mongo.ChangeStreamDocument): string | null {
1204
+ if (doc.txnNumber == null || doc.lsid == null) {
1205
+ return null;
1206
+ }
1207
+ return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
1208
+ }
@@ -80,7 +80,7 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ
80
80
  }
81
81
  }
82
82
 
83
- async getReplicationLagMillis(): Promise<number | undefined> {
83
+ getReplicationLagMillis(): number | undefined {
84
84
  return this.lastStream?.getReplicationLagMillis();
85
85
  }
86
86
  }
@@ -41,25 +41,4 @@ export class ChangeStreamReplicator extends replication.AbstractReplicator<Chang
41
41
  async testConnection() {
42
42
  return await MongoModule.testConnection(this.connectionFactory.dbConnectionConfig);
43
43
  }
44
-
45
- async getReplicationLagMillis(): Promise<number | undefined> {
46
- const lag = await super.getReplicationLagMillis();
47
- if (lag != null) {
48
- return lag;
49
- }
50
-
51
- // Booting or in an error loop. Check last active replication status.
52
- // This includes sync rules in an ERROR state.
53
- const content = await this.storage.getActiveSyncRulesContent();
54
- if (content == null) {
55
- return undefined;
56
- }
57
- // Measure the lag from the last resume token's time
58
- const lsn = content.last_checkpoint_lsn;
59
- if (lsn == null) {
60
- return undefined;
61
- }
62
- const { timestamp } = MongoLSN.fromSerialized(lsn);
63
- return Date.now() - timestampToDate(timestamp).getTime();
64
- }
65
44
  }
@@ -1,6 +1,7 @@
1
1
  import { mongo } from '@powersync/lib-service-mongodb';
2
2
  import { ReplicationAssertionError } from '@powersync/lib-services-framework';
3
3
  import { bson } from '@powersync/service-core';
4
+ import { getCursorBatchBytes } from './internal-mongodb-utils.js';
4
5
 
5
6
  /**
6
7
  * Performs a collection snapshot query, chunking by ranges of _id.
@@ -21,7 +22,9 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
21
22
  this.batchSize = options.batchSize;
22
23
  }
23
24
 
24
- async nextChunk(): Promise<{ docs: mongo.Document[]; lastKey: Uint8Array } | { docs: []; lastKey: null }> {
25
+ async nextChunk(): Promise<
26
+ { docs: mongo.Document[]; lastKey: Uint8Array; bytes: number } | { docs: []; lastKey: null; bytes: 0 }
27
+ > {
25
28
  let cursor = this.lastCursor;
26
29
  let newCursor = false;
27
30
  if (cursor == null || cursor.closed) {
@@ -52,12 +55,13 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
52
55
  this.lastCursor = null;
53
56
  if (newCursor) {
54
57
  // We just created a new cursor and it has no results - we have finished the end of the query.
55
- return { docs: [], lastKey: null };
58
+ return { docs: [], lastKey: null, bytes: 0 };
56
59
  } else {
57
60
  // The cursor may have hit the batch limit - retry
58
61
  return this.nextChunk();
59
62
  }
60
63
  }
64
+ const bytes = getCursorBatchBytes(cursor);
61
65
  const docBatch = cursor.readBufferedDocuments();
62
66
  this.lastCursor = cursor;
63
67
  if (docBatch.length == 0) {
@@ -65,7 +69,7 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
65
69
  }
66
70
  const lastKey = docBatch[docBatch.length - 1]._id;
67
71
  this.lastKey = lastKey;
68
- return { docs: docBatch, lastKey: bson.serialize({ _id: lastKey }) };
72
+ return { docs: docBatch, lastKey: bson.serialize({ _id: lastKey }), bytes };
69
73
  }
70
74
 
71
75
  async [Symbol.asyncDispose](): Promise<void> {
@@ -0,0 +1,91 @@
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import { get } from 'http';
3
+
4
+ /**
5
+ * Track bytes read on a change stream.
6
+ *
7
+ * This is after decompression, and without TLS overhead.
8
+ *
9
+ * This excludes some protocol overhead, but does include per-batch overhead.
10
+ *
11
+ * This is built on internal APIs, and may stop working in future driver versions.
12
+ *
13
+ * @param add Called once for each batch of data.
14
+ */
15
+ export function trackChangeStreamBsonBytes(changeStream: mongo.ChangeStream, add: (bytes: number) => void) {
16
+ let internalChangeStream = changeStream as ChangeStreamWithCursor;
17
+ let current = internalChangeStream.cursor;
18
+ let degisterCursor = trackCursor(current, add);
19
+
20
+ const refresh = () => {
21
+ // The cursor may be replaced closed and re-opened (replaced) in various scenarios, such as
22
+ // after a primary fail-over event.
23
+ // There is no direct even to track that, but the `resumeTokenChanged` event is a good proxy.
24
+ // It may be called more often than the cursor is replaced, so we just check whether the cursor changed.
25
+ // This might miss the init batch, so we may under-count slightly in that case. It is a rare event
26
+ // and typically a small number of bytes, so it's fine to ignore.
27
+ const next = internalChangeStream.cursor;
28
+ if (next !== current) {
29
+ degisterCursor();
30
+ current = next;
31
+ degisterCursor = trackCursor(current, add);
32
+ }
33
+ };
34
+
35
+ changeStream.on('resumeTokenChanged', refresh);
36
+
37
+ // We return this to allow de-registration of the event listeners.
38
+ // However, these are garbage collected automatically when the stream is closed, so it's not strictly necessary to call this.
39
+ return () => {
40
+ changeStream.off('resumeTokenChanged', refresh);
41
+ };
42
+ }
43
+
44
+ /**
45
+ * Get the byte size of the current batch on a cursor.
46
+ *
47
+ * Call after hasNext(), before or after readBufferedDocuments().
48
+ *
49
+ * This is built on internal APIs, and may stop working in future driver versions.
50
+ */
51
+ export function getCursorBatchBytes(cursor: mongo.AbstractCursor): number {
52
+ const documents = (cursor as any).documents as CursorResponse | undefined;
53
+ return getResponseBytes(documents);
54
+ }
55
+
56
+ // Define the internal types from the driver.
57
+ // Here we're using them defensively, assuming it may be undefined at any point.
58
+
59
+ interface CursorResponse {
60
+ toBytes?(): Uint8Array;
61
+ }
62
+
63
+ interface ChangeStreamWithCursor extends mongo.ChangeStream {
64
+ cursor?: mongo.AbstractCursor;
65
+ }
66
+
67
+ function trackCursor(cursor: mongo.AbstractCursor | undefined, add: (bytes: number) => void) {
68
+ if (cursor == null) {
69
+ return () => {};
70
+ }
71
+ const countBatch = (response: CursorResponse | undefined) => {
72
+ const bytes = getResponseBytes(response);
73
+ if (bytes > 0) {
74
+ add(bytes);
75
+ }
76
+ };
77
+
78
+ // The `init` event is emitted for the first batch, and the `more` event is emitted for subsequent batches.
79
+ cursor.on('init', countBatch);
80
+ cursor.on('more', countBatch);
81
+
82
+ return () => {
83
+ cursor.off('init', countBatch);
84
+ cursor.off('more', countBatch);
85
+ };
86
+ }
87
+
88
+ function getResponseBytes(response: CursorResponse | undefined): number {
89
+ const buffer = response?.toBytes?.();
90
+ return buffer?.byteLength ?? 0;
91
+ }
@@ -2,3 +2,4 @@ export * from './MongoRelation.js';
2
2
  export * from './ChangeStream.js';
3
3
  export * from './ChangeStreamReplicator.js';
4
4
  export * from './ChangeStreamReplicationJob.js';
5
+ export * from './internal-mongodb-utils.js';
@@ -3,11 +3,14 @@ import { setTimeout } from 'node:timers/promises';
3
3
  import { describe, expect, test, vi } from 'vitest';
4
4
 
5
5
  import { mongo } from '@powersync/lib-service-mongodb';
6
+ import { createWriteCheckpoint } from '@powersync/service-core';
6
7
  import { test_utils } from '@powersync/service-core-tests';
7
8
 
9
+ import { MongoRouteAPIAdapter } from '@module/api/MongoRouteAPIAdapter.js';
8
10
  import { PostImagesOption } from '@module/types/types.js';
9
11
  import { ChangeStreamTestContext } from './change_stream_utils.js';
10
- import { describeWithStorage, StorageVersionTestContext } from './util.js';
12
+ import { describeWithStorage, StorageVersionTestContext, TEST_CONNECTION_OPTIONS } from './util.js';
13
+ import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/MongoRelation.js';
11
14
 
12
15
  const BASIC_SYNC_RULES = `
13
16
  bucket_definitions:
@@ -368,6 +371,59 @@ bucket_definitions:
368
371
  expect(data).toMatchObject([test_utils.putOp('test_data', { id: test_id, description: 'test1' })]);
369
372
  });
370
373
 
374
+ test('coalesces standalone checkpoints when backlog is buffered', async () => {
375
+ await using context = await openContext();
376
+ await context.updateSyncRules(BASIC_SYNC_RULES);
377
+ await context.replicateSnapshot();
378
+ await context.markSnapshotConsistent();
379
+ await using api = new MongoRouteAPIAdapter({
380
+ type: 'mongodb',
381
+ ...TEST_CONNECTION_OPTIONS
382
+ });
383
+
384
+ let commitCount = 0;
385
+ // This relies on internals to count how often checkpoints are committed
386
+ context.storage!.registerListener({
387
+ batchStarted: (batch) => {
388
+ const originalCommit = batch.commit.bind(batch);
389
+ batch.commit = async (...args) => {
390
+ commitCount += 1;
391
+ return await originalCommit(...args);
392
+ };
393
+ }
394
+ });
395
+
396
+ context.startStreaming();
397
+
398
+ // Wait until the stream is active and caught up, then start counting from zero.
399
+ await context.getCheckpoint();
400
+ commitCount = 0;
401
+
402
+ // Create a large number of write checkpoints together.
403
+ // We could alternatively use createCheckpoint() directly, but this gives more of
404
+ // an end-to-end test of the checkpointing behavior under load.
405
+ const checkpointCount = 30;
406
+ await Promise.all(
407
+ Array.from({ length: checkpointCount }, (i) =>
408
+ createWriteCheckpoint({
409
+ userId: 'test_user',
410
+ clientId: 'test_client' + i,
411
+ api,
412
+ storage: context.factory
413
+ })
414
+ )
415
+ );
416
+
417
+ // Wait for the checkpoints to be processed.
418
+ await context.getCheckpoint();
419
+
420
+ // We need at least 1 commit.
421
+ expect(commitCount).toBeGreaterThan(0);
422
+ // The previous implementation greated 1 commit per checkpoint, which is bad for performance.
423
+ // We expect a small number here - typically 2-10, but allow for anything less than the total number of checkpoints.
424
+ expect(commitCount).toBeLessThan(checkpointCount + 1);
425
+ });
426
+
371
427
  test('large record', async () => {
372
428
  // Test a large update.
373
429
 
@@ -0,0 +1,103 @@
1
+ import { describe, expect, test } from 'vitest';
2
+
3
+ import { getCursorBatchBytes, trackChangeStreamBsonBytes } from '@module/replication/replication-index.js';
4
+ import { mongo } from '@powersync/lib-service-mongodb';
5
+ import { clearTestDb, connectMongoData } from './util.js';
6
+
7
+ describe('internal mongodb utils', () => {
8
+ // The implementation relies on internal APIs, so we verify this works as expected for various types of change streams.
9
+ test('collection change stream size tracking', async () => {
10
+ await testChangeStreamBsonBytes('collection');
11
+ });
12
+
13
+ test('db change stream size tracking', async () => {
14
+ await testChangeStreamBsonBytes('db');
15
+ });
16
+
17
+ test('cluster change stream size tracking', async () => {
18
+ await testChangeStreamBsonBytes('cluster');
19
+ });
20
+
21
+ test('cursor batch size tracking', async () => {
22
+ const { db, client } = await connectMongoData();
23
+ await using _ = { [Symbol.asyncDispose]: async () => await client.close() };
24
+ await clearTestDb(db);
25
+ const collection = db.collection('test_data');
26
+ await collection.insertMany([{ test: 1 }, { test: 2 }, { test: 3 }, { test: 4 }, { test: 5 }]);
27
+
28
+ const cursor = collection.find({}, { batchSize: 2 });
29
+ let batchBytes: number[] = [];
30
+ let totalBytes = 0;
31
+ // We use this in the same way as ChunkedSnapshotQuery
32
+ while (await cursor.hasNext()) {
33
+ batchBytes.push(getCursorBatchBytes(cursor));
34
+ totalBytes += batchBytes[batchBytes.length - 1];
35
+ cursor.readBufferedDocuments();
36
+ }
37
+
38
+ // 3 batches: [2, 2, 1] documents. Should not change
39
+ expect(batchBytes.length).toEqual(3);
40
+ // Current tests show 839, but this may change depending on the MongoDB version and other conditions.
41
+ expect(totalBytes).toBeGreaterThan(400);
42
+ expect(totalBytes).toBeLessThan(1200);
43
+ });
44
+
45
+ async function testChangeStreamBsonBytes(type: 'db' | 'collection' | 'cluster') {
46
+ // With MongoDB, replication uses the exact same document format
47
+ // as normal queries. We test it anyway.
48
+ const { db, client } = await connectMongoData();
49
+ await using _ = { [Symbol.asyncDispose]: async () => await client.close() };
50
+ await clearTestDb(db);
51
+ const collection = db.collection('test_data');
52
+
53
+ let stream: mongo.ChangeStream;
54
+ if (type === 'collection') {
55
+ stream = collection.watch([], {
56
+ maxAwaitTimeMS: 5,
57
+ fullDocument: 'updateLookup'
58
+ });
59
+ } else if (type === 'db') {
60
+ stream = db.watch([], {
61
+ maxAwaitTimeMS: 5,
62
+ fullDocument: 'updateLookup'
63
+ });
64
+ } else {
65
+ stream = client.watch([], {
66
+ maxAwaitTimeMS: 5,
67
+ fullDocument: 'updateLookup'
68
+ });
69
+ }
70
+
71
+ let batchBytes: number[] = [];
72
+ let totalBytes = 0;
73
+ trackChangeStreamBsonBytes(stream, (bytes) => {
74
+ batchBytes.push(bytes);
75
+ totalBytes += bytes;
76
+ });
77
+
78
+ const readAll = async () => {
79
+ while ((await stream.tryNext()) != null) {}
80
+ };
81
+
82
+ await readAll();
83
+
84
+ await collection.insertOne({ test: 1 });
85
+ await readAll();
86
+ await collection.insertOne({ test: 2 });
87
+ await readAll();
88
+ await collection.insertOne({ test: 3 });
89
+ await readAll();
90
+
91
+ await stream.close();
92
+
93
+ // The exact length by vary based on exact batching logic, but we do want to know when it changes.
94
+ // Note: If this causes unstable tests, we can relax this check.
95
+ expect(batchBytes.length).toEqual(8);
96
+
97
+ // Current tests show 4464-4576 bytes for the size, depending on the type of change stream.
98
+ // This can easily vary based on the mongodb version and general conditions, so we just check the general range.
99
+ // For the most part, if any bytes are reported, the tracking is working.
100
+ expect(totalBytes).toBeGreaterThan(2000);
101
+ expect(totalBytes).toBeLessThan(8000);
102
+ }
103
+ });