@powersync/service-module-mongodb 0.15.1 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,7 @@ import {
38
38
  } from './MongoRelation.js';
39
39
  import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
40
40
  import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
41
+ import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
41
42
 
42
43
  export interface ChangeStreamOptions {
43
44
  connections: MongoManager;
@@ -479,6 +480,10 @@ export class ChangeStream {
479
480
  }
480
481
 
481
482
  private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
483
+ const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
484
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
485
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
486
+
482
487
  const totalEstimatedCount = await this.estimatedCountNumber(table);
483
488
  let at = table.snapshotStatus?.replicatedCount ?? 0;
484
489
  const db = this.client.db(table.schema);
@@ -499,11 +504,13 @@ export class ChangeStream {
499
504
  let lastBatch = performance.now();
500
505
  let nextChunkPromise = query.nextChunk();
501
506
  while (true) {
502
- const { docs: docBatch, lastKey } = await nextChunkPromise;
507
+ const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
503
508
  if (docBatch.length == 0) {
504
509
  // No more data - stop iterating
505
510
  break;
506
511
  }
512
+ bytesReplicatedMetric.add(chunkBytes);
513
+ chunksReplicatedMetric.add(1);
507
514
 
508
515
  if (this.abort_signal.aborted) {
509
516
  throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
@@ -528,7 +535,7 @@ export class ChangeStream {
528
535
  // Important: flush before marking progress
529
536
  await batch.flush();
530
537
  at += docBatch.length;
531
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length);
538
+ rowsReplicatedMetric.add(docBatch.length);
532
539
 
533
540
  table = await batch.updateTableProgress(table, {
534
541
  lastKey,
@@ -819,7 +826,30 @@ export class ChangeStream {
819
826
  };
820
827
  }
821
828
 
829
+ private getBufferedChangeCount(stream: mongo.ChangeStream<mongo.Document>): number {
830
+ // The driver keeps fetched change stream documents on the underlying cursor, but does
831
+ // not expose that through the public ChangeStream API. We use this to detect backlog
832
+ // building up before we have processed the corresponding source changes locally.
833
+ // If the driver API changes, we'll have a hard error here.
834
+ // We specifically want to avoid a silent performance regression if the driver behavior changes.
835
+ const cursor = (
836
+ stream as mongo.ChangeStream<mongo.Document> & {
837
+ cursor: mongo.AbstractCursor<mongo.ChangeStreamDocument<mongo.Document>>;
838
+ }
839
+ ).cursor;
840
+ if (cursor == null || typeof cursor.bufferedCount != 'function') {
841
+ throw new ReplicationAssertionError(
842
+ 'MongoDB ChangeStream no longer exposes an internal cursor with bufferedCount'
843
+ );
844
+ }
845
+ return cursor.bufferedCount();
846
+ }
847
+
822
848
  async streamChangesInternal() {
849
+ const transactionsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED);
850
+ const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
851
+ const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
852
+
823
853
  await this.storage.startBatch(
824
854
  {
825
855
  logger: this.logger,
@@ -848,6 +878,11 @@ export class ChangeStream {
848
878
  await stream.close();
849
879
  return;
850
880
  }
881
+ trackChangeStreamBsonBytes(stream, (bytes) => {
882
+ bytesReplicatedMetric.add(bytes);
883
+ // Each of these represent a single response message from MongoDB.
884
+ chunksReplicatedMetric.add(1);
885
+ });
851
886
 
852
887
  // Always start with a checkpoint.
853
888
  // This helps us to clear errors when restarting, even if there is
@@ -864,6 +899,7 @@ export class ChangeStream {
864
899
  let changesSinceLastCheckpoint = 0;
865
900
 
866
901
  let lastEmptyResume = performance.now();
902
+ let lastTxnKey: string | null = null;
867
903
 
868
904
  while (true) {
869
905
  if (this.abort_signal.aborted) {
@@ -1005,7 +1041,20 @@ export class ChangeStream {
1005
1041
  // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
1006
1042
 
1007
1043
  const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
1008
- if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) {
1044
+
1045
+ if (checkpointId == STANDALONE_CHECKPOINT_ID) {
1046
+ // Standalone / write checkpoint received.
1047
+ // When we are caught up, commit immediately to keep write checkpoint latency low.
1048
+ // Once there is already a batch checkpoint pending, or the driver has buffered more
1049
+ // change stream events, collapse standalone checkpoints into the normal batch
1050
+ // checkpoint flow to avoid commit churn under sustained load.
1051
+ if (waitForCheckpointLsn != null || this.getBufferedChangeCount(stream) > 0) {
1052
+ if (waitForCheckpointLsn == null) {
1053
+ waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1054
+ }
1055
+ continue;
1056
+ }
1057
+ } else if (!this.checkpointStreamId.equals(checkpointId)) {
1009
1058
  continue;
1010
1059
  }
1011
1060
  const { comparable: lsn } = new MongoLSN({
@@ -1044,6 +1093,7 @@ export class ChangeStream {
1044
1093
  if (waitForCheckpointLsn == null) {
1045
1094
  waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
1046
1095
  }
1096
+
1047
1097
  const rel = getMongoRelation(changeDocument.ns);
1048
1098
  const table = await this.getRelation(batch, rel, {
1049
1099
  // In most cases, we should not need to snapshot this. But if this is the first time we see the collection
@@ -1056,6 +1106,17 @@ export class ChangeStream {
1056
1106
  if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
1057
1107
  this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
1058
1108
  }
1109
+
1110
+ const transactionKeyValue = transactionKey(changeDocument);
1111
+
1112
+ if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
1113
+ // Very crude metric for counting transactions replicated.
1114
+ // We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
1115
+ // Individual writes may not have a txnNumber, in which case we count them as separate transactions.
1116
+ lastTxnKey = transactionKeyValue;
1117
+ transactionsReplicatedMetric.add(1);
1118
+ }
1119
+
1059
1120
  const flushResult = await this.writeChange(batch, table, changeDocument);
1060
1121
  changesSinceLastCheckpoint += 1;
1061
1122
  if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
@@ -1153,3 +1214,13 @@ function mapChangeStreamError(e: any) {
1153
1214
  throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e);
1154
1215
  }
1155
1216
  }
1217
+
1218
+ /**
1219
+ * Transaction key for a change stream event, used to detect transaction boundaries. Returns null if the event is not part of a transaction.
1220
+ */
1221
+ function transactionKey(doc: mongo.ChangeStreamDocument): string | null {
1222
+ if (doc.txnNumber == null || doc.lsid == null) {
1223
+ return null;
1224
+ }
1225
+ return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
1226
+ }
@@ -1,6 +1,7 @@
1
1
  import { mongo } from '@powersync/lib-service-mongodb';
2
2
  import { ReplicationAssertionError } from '@powersync/lib-services-framework';
3
3
  import { bson } from '@powersync/service-core';
4
+ import { getCursorBatchBytes } from './internal-mongodb-utils.js';
4
5
 
5
6
  /**
6
7
  * Performs a collection snapshot query, chunking by ranges of _id.
@@ -21,7 +22,9 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
21
22
  this.batchSize = options.batchSize;
22
23
  }
23
24
 
24
- async nextChunk(): Promise<{ docs: mongo.Document[]; lastKey: Uint8Array } | { docs: []; lastKey: null }> {
25
+ async nextChunk(): Promise<
26
+ { docs: mongo.Document[]; lastKey: Uint8Array; bytes: number } | { docs: []; lastKey: null; bytes: 0 }
27
+ > {
25
28
  let cursor = this.lastCursor;
26
29
  let newCursor = false;
27
30
  if (cursor == null || cursor.closed) {
@@ -52,12 +55,13 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
52
55
  this.lastCursor = null;
53
56
  if (newCursor) {
54
57
  // We just created a new cursor and it has no results - we have finished the end of the query.
55
- return { docs: [], lastKey: null };
58
+ return { docs: [], lastKey: null, bytes: 0 };
56
59
  } else {
57
60
  // The cursor may have hit the batch limit - retry
58
61
  return this.nextChunk();
59
62
  }
60
63
  }
64
+ const bytes = getCursorBatchBytes(cursor);
61
65
  const docBatch = cursor.readBufferedDocuments();
62
66
  this.lastCursor = cursor;
63
67
  if (docBatch.length == 0) {
@@ -65,7 +69,7 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
65
69
  }
66
70
  const lastKey = docBatch[docBatch.length - 1]._id;
67
71
  this.lastKey = lastKey;
68
- return { docs: docBatch, lastKey: bson.serialize({ _id: lastKey }) };
72
+ return { docs: docBatch, lastKey: bson.serialize({ _id: lastKey }), bytes };
69
73
  }
70
74
 
71
75
  async [Symbol.asyncDispose](): Promise<void> {
@@ -0,0 +1,91 @@
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import { get } from 'http';
3
+
4
+ /**
5
+ * Track bytes read on a change stream.
6
+ *
7
+ * This is after decompression, and without TLS overhead.
8
+ *
9
+ * This excludes some protocol overhead, but does include per-batch overhead.
10
+ *
11
+ * This is built on internal APIs, and may stop working in future driver versions.
12
+ *
13
+ * @param add Called once for each batch of data.
14
+ */
15
+ export function trackChangeStreamBsonBytes(changeStream: mongo.ChangeStream, add: (bytes: number) => void) {
16
+ let internalChangeStream = changeStream as ChangeStreamWithCursor;
17
+ let current = internalChangeStream.cursor;
18
+ let degisterCursor = trackCursor(current, add);
19
+
20
+ const refresh = () => {
21
+ // The cursor may be replaced closed and re-opened (replaced) in various scenarios, such as
22
+ // after a primary fail-over event.
23
+ // There is no direct even to track that, but the `resumeTokenChanged` event is a good proxy.
24
+ // It may be called more often than the cursor is replaced, so we just check whether the cursor changed.
25
+ // This might miss the init batch, so we may under-count slightly in that case. It is a rare event
26
+ // and typically a small number of bytes, so it's fine to ignore.
27
+ const next = internalChangeStream.cursor;
28
+ if (next !== current) {
29
+ degisterCursor();
30
+ current = next;
31
+ degisterCursor = trackCursor(current, add);
32
+ }
33
+ };
34
+
35
+ changeStream.on('resumeTokenChanged', refresh);
36
+
37
+ // We return this to allow de-registration of the event listeners.
38
+ // However, these are garbage collected automatically when the stream is closed, so it's not strictly necessary to call this.
39
+ return () => {
40
+ changeStream.off('resumeTokenChanged', refresh);
41
+ };
42
+ }
43
+
44
+ /**
45
+ * Get the byte size of the current batch on a cursor.
46
+ *
47
+ * Call after hasNext(), before or after readBufferedDocuments().
48
+ *
49
+ * This is built on internal APIs, and may stop working in future driver versions.
50
+ */
51
+ export function getCursorBatchBytes(cursor: mongo.AbstractCursor): number {
52
+ const documents = (cursor as any).documents as CursorResponse | undefined;
53
+ return getResponseBytes(documents);
54
+ }
55
+
56
+ // Define the internal types from the driver.
57
+ // Here we're using them defensively, assuming it may be undefined at any point.
58
+
59
+ interface CursorResponse {
60
+ toBytes?(): Uint8Array;
61
+ }
62
+
63
+ interface ChangeStreamWithCursor extends mongo.ChangeStream {
64
+ cursor?: mongo.AbstractCursor;
65
+ }
66
+
67
+ function trackCursor(cursor: mongo.AbstractCursor | undefined, add: (bytes: number) => void) {
68
+ if (cursor == null) {
69
+ return () => {};
70
+ }
71
+ const countBatch = (response: CursorResponse | undefined) => {
72
+ const bytes = getResponseBytes(response);
73
+ if (bytes > 0) {
74
+ add(bytes);
75
+ }
76
+ };
77
+
78
+ // The `init` event is emitted for the first batch, and the `more` event is emitted for subsequent batches.
79
+ cursor.on('init', countBatch);
80
+ cursor.on('more', countBatch);
81
+
82
+ return () => {
83
+ cursor.off('init', countBatch);
84
+ cursor.off('more', countBatch);
85
+ };
86
+ }
87
+
88
+ function getResponseBytes(response: CursorResponse | undefined): number {
89
+ const buffer = response?.toBytes?.();
90
+ return buffer?.byteLength ?? 0;
91
+ }
@@ -2,3 +2,4 @@ export * from './MongoRelation.js';
2
2
  export * from './ChangeStream.js';
3
3
  export * from './ChangeStreamReplicator.js';
4
4
  export * from './ChangeStreamReplicationJob.js';
5
+ export * from './internal-mongodb-utils.js';
@@ -3,11 +3,14 @@ import { setTimeout } from 'node:timers/promises';
3
3
  import { describe, expect, test, vi } from 'vitest';
4
4
 
5
5
  import { mongo } from '@powersync/lib-service-mongodb';
6
+ import { createWriteCheckpoint } from '@powersync/service-core';
6
7
  import { test_utils } from '@powersync/service-core-tests';
7
8
 
9
+ import { MongoRouteAPIAdapter } from '@module/api/MongoRouteAPIAdapter.js';
8
10
  import { PostImagesOption } from '@module/types/types.js';
9
11
  import { ChangeStreamTestContext } from './change_stream_utils.js';
10
- import { describeWithStorage, StorageVersionTestContext } from './util.js';
12
+ import { describeWithStorage, StorageVersionTestContext, TEST_CONNECTION_OPTIONS } from './util.js';
13
+ import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/MongoRelation.js';
11
14
 
12
15
  const BASIC_SYNC_RULES = `
13
16
  bucket_definitions:
@@ -368,6 +371,59 @@ bucket_definitions:
368
371
  expect(data).toMatchObject([test_utils.putOp('test_data', { id: test_id, description: 'test1' })]);
369
372
  });
370
373
 
374
+ test('coalesces standalone checkpoints when backlog is buffered', async () => {
375
+ await using context = await openContext();
376
+ await context.updateSyncRules(BASIC_SYNC_RULES);
377
+ await context.replicateSnapshot();
378
+ await context.markSnapshotConsistent();
379
+ await using api = new MongoRouteAPIAdapter({
380
+ type: 'mongodb',
381
+ ...TEST_CONNECTION_OPTIONS
382
+ });
383
+
384
+ let commitCount = 0;
385
+ // This relies on internals to count how often checkpoints are committed
386
+ context.storage!.registerListener({
387
+ batchStarted: (batch) => {
388
+ const originalCommit = batch.commit.bind(batch);
389
+ batch.commit = async (...args) => {
390
+ commitCount += 1;
391
+ return await originalCommit(...args);
392
+ };
393
+ }
394
+ });
395
+
396
+ context.startStreaming();
397
+
398
+ // Wait until the stream is active and caught up, then start counting from zero.
399
+ await context.getCheckpoint();
400
+ commitCount = 0;
401
+
402
+ // Create a large number of write checkpoints together.
403
+ // We could alternatively use createCheckpoint() directly, but this gives more of
404
+ // an end-to-end test of the checkpointing behavior under load.
405
+ const checkpointCount = 30;
406
+ await Promise.all(
407
+ Array.from({ length: checkpointCount }, (i) =>
408
+ createWriteCheckpoint({
409
+ userId: 'test_user',
410
+ clientId: 'test_client' + i,
411
+ api,
412
+ storage: context.factory
413
+ })
414
+ )
415
+ );
416
+
417
+ // Wait for the checkpoints to be processed.
418
+ await context.getCheckpoint();
419
+
420
+ // We need at least 1 commit.
421
+ expect(commitCount).toBeGreaterThan(0);
422
+ // The previous implementation greated 1 commit per checkpoint, which is bad for performance.
423
+ // We expect a small number here - typically 2-10, but allow for anything less than the total number of checkpoints.
424
+ expect(commitCount).toBeLessThan(checkpointCount + 1);
425
+ });
426
+
371
427
  test('large record', async () => {
372
428
  // Test a large update.
373
429
 
@@ -0,0 +1,103 @@
1
+ import { describe, expect, test } from 'vitest';
2
+
3
+ import { getCursorBatchBytes, trackChangeStreamBsonBytes } from '@module/replication/replication-index.js';
4
+ import { mongo } from '@powersync/lib-service-mongodb';
5
+ import { clearTestDb, connectMongoData } from './util.js';
6
+
7
+ describe('internal mongodb utils', () => {
8
+ // The implementation relies on internal APIs, so we verify this works as expected for various types of change streams.
9
+ test('collection change stream size tracking', async () => {
10
+ await testChangeStreamBsonBytes('collection');
11
+ });
12
+
13
+ test('db change stream size tracking', async () => {
14
+ await testChangeStreamBsonBytes('db');
15
+ });
16
+
17
+ test('cluster change stream size tracking', async () => {
18
+ await testChangeStreamBsonBytes('cluster');
19
+ });
20
+
21
+ test('cursor batch size tracking', async () => {
22
+ const { db, client } = await connectMongoData();
23
+ await using _ = { [Symbol.asyncDispose]: async () => await client.close() };
24
+ await clearTestDb(db);
25
+ const collection = db.collection('test_data');
26
+ await collection.insertMany([{ test: 1 }, { test: 2 }, { test: 3 }, { test: 4 }, { test: 5 }]);
27
+
28
+ const cursor = collection.find({}, { batchSize: 2 });
29
+ let batchBytes: number[] = [];
30
+ let totalBytes = 0;
31
+ // We use this in the same way as ChunkedSnapshotQuery
32
+ while (await cursor.hasNext()) {
33
+ batchBytes.push(getCursorBatchBytes(cursor));
34
+ totalBytes += batchBytes[batchBytes.length - 1];
35
+ cursor.readBufferedDocuments();
36
+ }
37
+
38
+ // 3 batches: [2, 2, 1] documents. Should not change
39
+ expect(batchBytes.length).toEqual(3);
40
+ // Current tests show 839, but this may change depending on the MongoDB version and other conditions.
41
+ expect(totalBytes).toBeGreaterThan(400);
42
+ expect(totalBytes).toBeLessThan(1200);
43
+ });
44
+
45
+ async function testChangeStreamBsonBytes(type: 'db' | 'collection' | 'cluster') {
46
+ // With MongoDB, replication uses the exact same document format
47
+ // as normal queries. We test it anyway.
48
+ const { db, client } = await connectMongoData();
49
+ await using _ = { [Symbol.asyncDispose]: async () => await client.close() };
50
+ await clearTestDb(db);
51
+ const collection = db.collection('test_data');
52
+
53
+ let stream: mongo.ChangeStream;
54
+ if (type === 'collection') {
55
+ stream = collection.watch([], {
56
+ maxAwaitTimeMS: 5,
57
+ fullDocument: 'updateLookup'
58
+ });
59
+ } else if (type === 'db') {
60
+ stream = db.watch([], {
61
+ maxAwaitTimeMS: 5,
62
+ fullDocument: 'updateLookup'
63
+ });
64
+ } else {
65
+ stream = client.watch([], {
66
+ maxAwaitTimeMS: 5,
67
+ fullDocument: 'updateLookup'
68
+ });
69
+ }
70
+
71
+ let batchBytes: number[] = [];
72
+ let totalBytes = 0;
73
+ trackChangeStreamBsonBytes(stream, (bytes) => {
74
+ batchBytes.push(bytes);
75
+ totalBytes += bytes;
76
+ });
77
+
78
+ const readAll = async () => {
79
+ while ((await stream.tryNext()) != null) {}
80
+ };
81
+
82
+ await readAll();
83
+
84
+ await collection.insertOne({ test: 1 });
85
+ await readAll();
86
+ await collection.insertOne({ test: 2 });
87
+ await readAll();
88
+ await collection.insertOne({ test: 3 });
89
+ await readAll();
90
+
91
+ await stream.close();
92
+
93
+ // The exact length by vary based on exact batching logic, but we do want to know when it changes.
94
+ // Note: If this causes unstable tests, we can relax this check.
95
+ expect(batchBytes.length).toEqual(8);
96
+
97
+ // Current tests show 4464-4576 bytes for the size, depending on the type of change stream.
98
+ // This can easily vary based on the mongodb version and general conditions, so we just check the general range.
99
+ // For the most part, if any bytes are reported, the tracking is working.
100
+ expect(totalBytes).toBeGreaterThan(2000);
101
+ expect(totalBytes).toBeLessThan(8000);
102
+ }
103
+ });