@powersync/service-module-mongodb 0.15.4 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/dist/api/MongoRouteAPIAdapter.js +12 -21
- package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
- package/dist/replication/ChangeStream.d.ts +23 -42
- package/dist/replication/ChangeStream.js +363 -600
- package/dist/replication/ChangeStream.js.map +1 -1
- package/dist/replication/ChangeStreamReplicationJob.js +2 -2
- package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
- package/dist/replication/JsonBufferWriter.d.ts +80 -0
- package/dist/replication/JsonBufferWriter.js +342 -0
- package/dist/replication/JsonBufferWriter.js.map +1 -0
- package/dist/replication/MongoRelation.d.ts +1 -1
- package/dist/replication/MongoRelation.js +45 -21
- package/dist/replication/MongoRelation.js.map +1 -1
- package/dist/replication/MongoSnapshotQuery.d.ts +1 -1
- package/dist/replication/MongoSnapshotQuery.js +6 -3
- package/dist/replication/MongoSnapshotQuery.js.map +1 -1
- package/dist/replication/MongoSnapshotter.d.ts +81 -0
- package/dist/replication/MongoSnapshotter.js +594 -0
- package/dist/replication/MongoSnapshotter.js.map +1 -0
- package/dist/replication/RawChangeStream.d.ts +55 -0
- package/dist/replication/RawChangeStream.js +322 -0
- package/dist/replication/RawChangeStream.js.map +1 -0
- package/dist/replication/SourceRowConverter.d.ts +46 -0
- package/dist/replication/SourceRowConverter.js +42 -0
- package/dist/replication/SourceRowConverter.js.map +1 -0
- package/dist/replication/bufferToSqlite.d.ts +43 -0
- package/dist/replication/bufferToSqlite.js +740 -0
- package/dist/replication/bufferToSqlite.js.map +1 -0
- package/dist/replication/internal-mongodb-utils.d.ts +0 -12
- package/dist/replication/internal-mongodb-utils.js +0 -54
- package/dist/replication/internal-mongodb-utils.js.map +1 -1
- package/dist/replication/replication-index.d.ts +2 -0
- package/dist/replication/replication-index.js +2 -0
- package/dist/replication/replication-index.js.map +1 -1
- package/package.json +11 -11
- package/scripts/benchmark-change-document-json.mts +358 -0
- package/scripts/benchmark-change-document.mts +370 -0
- package/src/api/MongoRouteAPIAdapter.ts +13 -21
- package/src/replication/ChangeStream.ts +421 -720
- package/src/replication/ChangeStreamReplicationJob.ts +2 -2
- package/src/replication/JsonBufferWriter.ts +390 -0
- package/src/replication/MongoRelation.ts +54 -25
- package/src/replication/MongoSnapshotQuery.ts +8 -5
- package/src/replication/MongoSnapshotter.ts +729 -0
- package/src/replication/RawChangeStream.ts +460 -0
- package/src/replication/SourceRowConverter.ts +65 -0
- package/src/replication/bufferToSqlite.ts +944 -0
- package/src/replication/internal-mongodb-utils.ts +0 -65
- package/src/replication/replication-index.ts +2 -0
- package/test/src/buffer_to_sqlite.test.ts +1146 -0
- package/test/src/change_stream.test.ts +259 -19
- package/test/src/change_stream_utils.ts +28 -27
- package/test/src/checkpoint_retry.test.ts +131 -0
- package/test/src/mongo_test.test.ts +66 -64
- package/test/src/parse_document_id.test.ts +54 -0
- package/test/src/raw_change_stream.test.ts +547 -0
- package/test/src/resume.test.ts +12 -2
- package/test/src/resuming_snapshots.test.ts +10 -6
- package/test/src/util.ts +56 -3
- package/test/tsconfig.json +0 -1
- package/tsconfig.scripts.json +13 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/test/src/internal_mongodb_utils.test.ts +0 -103
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { mongo } from '@powersync/lib-service-mongodb';
|
|
2
2
|
import {
|
|
3
3
|
container,
|
|
4
4
|
DatabaseConnectionError,
|
|
5
|
-
logger as defaultLogger,
|
|
6
5
|
ErrorCode,
|
|
7
6
|
Logger,
|
|
8
7
|
ReplicationAbortedError,
|
|
@@ -11,6 +10,7 @@ import {
|
|
|
11
10
|
} from '@powersync/lib-services-framework';
|
|
12
11
|
import {
|
|
13
12
|
MetricsEngine,
|
|
13
|
+
PerformanceTracer,
|
|
14
14
|
RelationCache,
|
|
15
15
|
ReplicationLagTracker,
|
|
16
16
|
SaveOperationTag,
|
|
@@ -18,29 +18,23 @@ import {
|
|
|
18
18
|
SourceTable,
|
|
19
19
|
storage
|
|
20
20
|
} from '@powersync/service-core';
|
|
21
|
-
import {
|
|
22
|
-
DatabaseInputRow,
|
|
23
|
-
HydratedSyncRules,
|
|
24
|
-
SqliteInputRow,
|
|
25
|
-
SqliteRow,
|
|
26
|
-
TablePattern
|
|
27
|
-
} from '@powersync/service-sync-rules';
|
|
21
|
+
import { HydratedSyncConfig } from '@powersync/service-sync-rules';
|
|
28
22
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
23
|
+
import { performance } from 'node:perf_hooks';
|
|
29
24
|
import { MongoLSN } from '../common/MongoLSN.js';
|
|
30
25
|
import { PostImagesOption } from '../types/types.js';
|
|
31
26
|
import { escapeRegExp } from '../utils.js';
|
|
32
|
-
import { trackChangeStreamBsonBytes } from './internal-mongodb-utils.js';
|
|
33
27
|
import { MongoManager } from './MongoManager.js';
|
|
28
|
+
import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
|
|
29
|
+
import { MongoSnapshotter, MongoSnapshotterHooks } from './MongoSnapshotter.js';
|
|
34
30
|
import {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
} from './MongoRelation.js';
|
|
41
|
-
import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
|
|
31
|
+
ChangeStreamBatch,
|
|
32
|
+
parseChangeDocument,
|
|
33
|
+
ProjectedChangeStreamDocument,
|
|
34
|
+
rawChangeStream
|
|
35
|
+
} from './RawChangeStream.js';
|
|
42
36
|
import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
|
|
43
|
-
|
|
37
|
+
import { DirectSourceRowConverter, SourceRowConverter } from './SourceRowConverter.js';
|
|
44
38
|
export interface ChangeStreamOptions {
|
|
45
39
|
connections: MongoManager;
|
|
46
40
|
storage: storage.SyncRulesBucketStorage;
|
|
@@ -59,12 +53,10 @@ export interface ChangeStreamOptions {
|
|
|
59
53
|
*/
|
|
60
54
|
snapshotChunkLength?: number;
|
|
61
55
|
|
|
62
|
-
|
|
63
|
-
|
|
56
|
+
storageHooks?: storage.StorageHooks;
|
|
57
|
+
snapshotHooks?: MongoSnapshotterHooks;
|
|
64
58
|
|
|
65
|
-
|
|
66
|
-
needsInitialSync: boolean;
|
|
67
|
-
snapshotLsn: string | null;
|
|
59
|
+
logger?: Logger;
|
|
68
60
|
}
|
|
69
61
|
|
|
70
62
|
/**
|
|
@@ -82,7 +74,7 @@ export class ChangeStreamInvalidatedError extends DatabaseConnectionError {
|
|
|
82
74
|
}
|
|
83
75
|
|
|
84
76
|
export class ChangeStream {
|
|
85
|
-
sync_rules:
|
|
77
|
+
sync_rules: HydratedSyncConfig;
|
|
86
78
|
group_id: number;
|
|
87
79
|
|
|
88
80
|
connection_id = 1;
|
|
@@ -96,8 +88,15 @@ export class ChangeStream {
|
|
|
96
88
|
|
|
97
89
|
private readonly maxAwaitTimeMS: number;
|
|
98
90
|
|
|
99
|
-
private
|
|
91
|
+
private abortController = new AbortController();
|
|
92
|
+
private abortSignal: AbortSignal = this.abortController.signal;
|
|
93
|
+
|
|
94
|
+
private initPromise: Promise<void> | null = null;
|
|
95
|
+
private snapshotter: MongoSnapshotter;
|
|
100
96
|
|
|
97
|
+
/**
|
|
98
|
+
* We use the relationCache _only_ for caching static SourceTable info, not for snapshot status.
|
|
99
|
+
*/
|
|
101
100
|
private relationCache = new RelationCache(getCacheIdentifier);
|
|
102
101
|
|
|
103
102
|
private replicationLag = new ReplicationLagTracker();
|
|
@@ -110,6 +109,10 @@ export class ChangeStream {
|
|
|
110
109
|
|
|
111
110
|
private changeStreamTimeout: number;
|
|
112
111
|
|
|
112
|
+
private storageHooks: storage.StorageHooks | undefined;
|
|
113
|
+
|
|
114
|
+
private readonly sourceRowConverter: SourceRowConverter;
|
|
115
|
+
|
|
113
116
|
constructor(options: ChangeStreamOptions) {
|
|
114
117
|
this.storage = options.storage;
|
|
115
118
|
this.metrics = options.metrics;
|
|
@@ -117,29 +120,40 @@ export class ChangeStream {
|
|
|
117
120
|
this.connections = options.connections;
|
|
118
121
|
this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
|
|
119
122
|
this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
|
|
123
|
+
this.storageHooks = options.storageHooks;
|
|
120
124
|
this.client = this.connections.client;
|
|
121
125
|
this.defaultDb = this.connections.db;
|
|
122
126
|
this.sync_rules = options.storage.getParsedSyncRules({
|
|
123
127
|
defaultSchema: this.defaultDb.databaseName
|
|
124
128
|
});
|
|
129
|
+
this.sourceRowConverter = new DirectSourceRowConverter(this.sync_rules.compatibility);
|
|
130
|
+
|
|
125
131
|
// The change stream aggregation command should timeout before the socket times out,
|
|
126
132
|
// so we use 90% of the socket timeout value.
|
|
127
133
|
this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
|
|
128
134
|
|
|
129
|
-
this.
|
|
130
|
-
this.
|
|
135
|
+
this.logger = options.logger ?? this.storage.logger;
|
|
136
|
+
this.snapshotter = new MongoSnapshotter({
|
|
137
|
+
...options,
|
|
138
|
+
abortSignal: this.abortSignal,
|
|
139
|
+
logger: this.logger,
|
|
140
|
+
checkpointStreamId: this.checkpointStreamId
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
options.abort_signal.addEventListener(
|
|
131
144
|
'abort',
|
|
132
145
|
() => {
|
|
133
|
-
|
|
146
|
+
this.abortController.abort(options.abort_signal.reason);
|
|
134
147
|
},
|
|
135
148
|
{ once: true }
|
|
136
149
|
);
|
|
137
|
-
|
|
138
|
-
|
|
150
|
+
if (options.abort_signal.aborted) {
|
|
151
|
+
this.abortController.abort(options.abort_signal.reason);
|
|
152
|
+
}
|
|
139
153
|
}
|
|
140
154
|
|
|
141
155
|
get stopped() {
|
|
142
|
-
return this.
|
|
156
|
+
return this.abortSignal.aborted;
|
|
143
157
|
}
|
|
144
158
|
|
|
145
159
|
private get usePostImages() {
|
|
@@ -150,270 +164,6 @@ export class ChangeStream {
|
|
|
150
164
|
return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
|
|
151
165
|
}
|
|
152
166
|
|
|
153
|
-
/**
|
|
154
|
-
* This resolves a pattern, persists the related metadata, and returns
|
|
155
|
-
* the resulting SourceTables.
|
|
156
|
-
*
|
|
157
|
-
* This implicitly checks the collection postImage configuration.
|
|
158
|
-
*/
|
|
159
|
-
async resolveQualifiedTableNames(
|
|
160
|
-
batch: storage.BucketStorageBatch,
|
|
161
|
-
tablePattern: TablePattern
|
|
162
|
-
): Promise<storage.SourceTable[]> {
|
|
163
|
-
const schema = tablePattern.schema;
|
|
164
|
-
if (tablePattern.connectionTag != this.connections.connectionTag) {
|
|
165
|
-
return [];
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
let nameFilter: RegExp | string;
|
|
169
|
-
if (tablePattern.isWildcard) {
|
|
170
|
-
nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix));
|
|
171
|
-
} else {
|
|
172
|
-
nameFilter = tablePattern.name;
|
|
173
|
-
}
|
|
174
|
-
let result: storage.SourceTable[] = [];
|
|
175
|
-
|
|
176
|
-
// Check if the collection exists
|
|
177
|
-
const collections = await this.client
|
|
178
|
-
.db(schema)
|
|
179
|
-
.listCollections(
|
|
180
|
-
{
|
|
181
|
-
name: nameFilter
|
|
182
|
-
},
|
|
183
|
-
{ nameOnly: false }
|
|
184
|
-
)
|
|
185
|
-
.toArray();
|
|
186
|
-
|
|
187
|
-
if (!tablePattern.isWildcard && collections.length == 0) {
|
|
188
|
-
this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
for (let collection of collections) {
|
|
192
|
-
const table = await this.handleRelation(
|
|
193
|
-
batch,
|
|
194
|
-
getMongoRelation({ db: schema, coll: collection.name }),
|
|
195
|
-
// This is done as part of the initial setup - snapshot is handled elsewhere
|
|
196
|
-
{ snapshot: false, collectionInfo: collection }
|
|
197
|
-
);
|
|
198
|
-
|
|
199
|
-
result.push(table);
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
return result;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
async initSlot(): Promise<InitResult> {
|
|
206
|
-
const status = await this.storage.getStatus();
|
|
207
|
-
if (status.snapshot_done && status.checkpoint_lsn) {
|
|
208
|
-
this.logger.info(`Initial replication already done`);
|
|
209
|
-
return { needsInitialSync: false, snapshotLsn: null };
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
async estimatedCount(table: storage.SourceTable): Promise<string> {
|
|
216
|
-
const count = await this.estimatedCountNumber(table);
|
|
217
|
-
return `~${count}`;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
|
|
221
|
-
const db = this.client.db(table.schema);
|
|
222
|
-
return await db.collection(table.name).estimatedDocumentCount();
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
|
|
227
|
-
*
|
|
228
|
-
* This LSN can survive initial replication restarts.
|
|
229
|
-
*/
|
|
230
|
-
private async getSnapshotLsn(): Promise<string> {
|
|
231
|
-
const hello = await this.defaultDb.command({ hello: 1 });
|
|
232
|
-
// Basic sanity check
|
|
233
|
-
if (hello.msg == 'isdbgrid') {
|
|
234
|
-
throw new ServiceError(
|
|
235
|
-
ErrorCode.PSYNC_S1341,
|
|
236
|
-
'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).'
|
|
237
|
-
);
|
|
238
|
-
} else if (hello.setName == null) {
|
|
239
|
-
throw new ServiceError(
|
|
240
|
-
ErrorCode.PSYNC_S1342,
|
|
241
|
-
'Standalone MongoDB instances are not supported - use a replicaset.'
|
|
242
|
-
);
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
// Open a change stream just to get a resume token for later use.
|
|
246
|
-
// We could use clusterTime from the hello command, but that won't tell us if the
|
|
247
|
-
// snapshot isn't valid anymore.
|
|
248
|
-
// If we just use the first resumeToken from the stream, we get two potential issues:
|
|
249
|
-
// 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
|
|
250
|
-
// in source db or other stream issues.
|
|
251
|
-
// 2. The first actual change we get may have the same clusterTime, causing us to incorrect
|
|
252
|
-
// skip that event.
|
|
253
|
-
// Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
|
|
254
|
-
// To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
|
|
255
|
-
// periodically until the timeout is reached.
|
|
256
|
-
|
|
257
|
-
const LSN_TIMEOUT_SECONDS = 60;
|
|
258
|
-
const LSN_CREATE_INTERVAL_SECONDS = 1;
|
|
259
|
-
|
|
260
|
-
// Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
|
|
261
|
-
const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
262
|
-
await using streamManager = this.openChangeStream({ lsn: firstCheckpointLsn, maxAwaitTimeMs: 0 });
|
|
263
|
-
|
|
264
|
-
const { stream } = streamManager;
|
|
265
|
-
const startTime = performance.now();
|
|
266
|
-
let lastCheckpointCreated = performance.now();
|
|
267
|
-
let eventsSeen = 0;
|
|
268
|
-
|
|
269
|
-
while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
|
|
270
|
-
if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
|
|
271
|
-
await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
272
|
-
lastCheckpointCreated = performance.now();
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
276
|
-
const changeDocument = await stream.tryNext().catch((e) => {
|
|
277
|
-
throw mapChangeStreamError(e);
|
|
278
|
-
});
|
|
279
|
-
if (changeDocument == null) {
|
|
280
|
-
continue;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
284
|
-
|
|
285
|
-
if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
|
|
286
|
-
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
287
|
-
if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
288
|
-
continue;
|
|
289
|
-
}
|
|
290
|
-
const { comparable: lsn } = new MongoLSN({
|
|
291
|
-
timestamp: changeDocument.clusterTime!,
|
|
292
|
-
resume_token: changeDocument._id
|
|
293
|
-
});
|
|
294
|
-
return lsn;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
eventsSeen += 1;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// Could happen if there is a very large replication lag?
|
|
301
|
-
throw new ServiceError(
|
|
302
|
-
ErrorCode.PSYNC_S1301,
|
|
303
|
-
`Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`
|
|
304
|
-
);
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
/**
|
|
308
|
-
* Given a snapshot LSN, validate that we can read from it, by opening a change stream.
|
|
309
|
-
*/
|
|
310
|
-
private async validateSnapshotLsn(lsn: string) {
|
|
311
|
-
await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
|
|
312
|
-
const { stream } = streamManager;
|
|
313
|
-
try {
|
|
314
|
-
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
315
|
-
await stream.tryNext();
|
|
316
|
-
} catch (e) {
|
|
317
|
-
// Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
|
|
318
|
-
// we possibly cannot recover from it.
|
|
319
|
-
throw mapChangeStreamError(e);
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
async initialReplication(snapshotLsn: string | null) {
|
|
324
|
-
const sourceTables = this.sync_rules.getSourceTables();
|
|
325
|
-
await this.client.connect();
|
|
326
|
-
|
|
327
|
-
const flushResult = await this.storage.startBatch(
|
|
328
|
-
{
|
|
329
|
-
logger: this.logger,
|
|
330
|
-
zeroLSN: MongoLSN.ZERO.comparable,
|
|
331
|
-
defaultSchema: this.defaultDb.databaseName,
|
|
332
|
-
storeCurrentData: false,
|
|
333
|
-
skipExistingRows: true
|
|
334
|
-
},
|
|
335
|
-
async (batch) => {
|
|
336
|
-
if (snapshotLsn == null) {
|
|
337
|
-
// First replication attempt - get a snapshot and store the timestamp
|
|
338
|
-
snapshotLsn = await this.getSnapshotLsn();
|
|
339
|
-
await batch.setResumeLsn(snapshotLsn);
|
|
340
|
-
this.logger.info(`Marking snapshot at ${snapshotLsn}`);
|
|
341
|
-
} else {
|
|
342
|
-
this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
|
|
343
|
-
// Check that the snapshot is still valid.
|
|
344
|
-
await this.validateSnapshotLsn(snapshotLsn);
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// Start by resolving all tables.
|
|
348
|
-
// This checks postImage configuration, and that should fail as
|
|
349
|
-
// early as possible.
|
|
350
|
-
let allSourceTables: SourceTable[] = [];
|
|
351
|
-
for (let tablePattern of sourceTables) {
|
|
352
|
-
const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
|
|
353
|
-
allSourceTables.push(...tables);
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
let tablesWithStatus: SourceTable[] = [];
|
|
357
|
-
for (let table of allSourceTables) {
|
|
358
|
-
if (table.snapshotComplete) {
|
|
359
|
-
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
360
|
-
continue;
|
|
361
|
-
}
|
|
362
|
-
let count = await this.estimatedCountNumber(table);
|
|
363
|
-
const updated = await batch.updateTableProgress(table, {
|
|
364
|
-
totalEstimatedCount: count
|
|
365
|
-
});
|
|
366
|
-
tablesWithStatus.push(updated);
|
|
367
|
-
this.relationCache.update(updated);
|
|
368
|
-
this.logger.info(
|
|
369
|
-
`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
|
|
370
|
-
);
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
for (let table of tablesWithStatus) {
|
|
374
|
-
await this.snapshotTable(batch, table);
|
|
375
|
-
await batch.markTableSnapshotDone([table]);
|
|
376
|
-
|
|
377
|
-
this.touch();
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
// The checkpoint here is a marker - we need to replicate up to at least this
|
|
381
|
-
// point before the data can be considered consistent.
|
|
382
|
-
// We could do this for each individual table, but may as well just do it once for the entire snapshot.
|
|
383
|
-
const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
|
|
384
|
-
await batch.markAllSnapshotDone(checkpoint);
|
|
385
|
-
|
|
386
|
-
// This will not create a consistent checkpoint yet, but will persist the op.
|
|
387
|
-
// Actual checkpoint will be created when streaming replication caught up.
|
|
388
|
-
await batch.commit(snapshotLsn);
|
|
389
|
-
|
|
390
|
-
this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
|
|
391
|
-
}
|
|
392
|
-
);
|
|
393
|
-
return { lastOpId: flushResult?.flushed_op };
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
private async setupCheckpointsCollection() {
|
|
397
|
-
const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
|
|
398
|
-
if (collection == null) {
|
|
399
|
-
await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
|
|
400
|
-
changeStreamPreAndPostImages: { enabled: true }
|
|
401
|
-
});
|
|
402
|
-
} else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
|
|
403
|
-
// Drop + create requires less permissions than collMod,
|
|
404
|
-
// and we don't care about the data in this collection.
|
|
405
|
-
await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
|
|
406
|
-
await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
|
|
407
|
-
changeStreamPreAndPostImages: { enabled: true }
|
|
408
|
-
});
|
|
409
|
-
} else {
|
|
410
|
-
// Clear the collection on startup, to keep it clean
|
|
411
|
-
// We never query this collection directly, and don't want to keep the data around.
|
|
412
|
-
// We only use this to get data into the oplog/changestream.
|
|
413
|
-
await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
|
|
417
167
|
private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } {
|
|
418
168
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
419
169
|
|
|
@@ -465,95 +215,14 @@ export class ChangeStream {
|
|
|
465
215
|
return { $match: nsFilter, multipleDatabases };
|
|
466
216
|
}
|
|
467
217
|
|
|
468
|
-
|
|
469
|
-
for (let row of results) {
|
|
470
|
-
yield constructAfterRecord(row);
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
|
|
475
|
-
const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
|
|
476
|
-
const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
|
|
477
|
-
const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
|
|
478
|
-
|
|
479
|
-
const totalEstimatedCount = await this.estimatedCountNumber(table);
|
|
480
|
-
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
481
|
-
const db = this.client.db(table.schema);
|
|
482
|
-
const collection = db.collection(table.name);
|
|
483
|
-
await using query = new ChunkedSnapshotQuery({
|
|
484
|
-
collection,
|
|
485
|
-
key: table.snapshotStatus?.lastKey,
|
|
486
|
-
batchSize: this.snapshotChunkLength
|
|
487
|
-
});
|
|
488
|
-
if (query.lastKey != null) {
|
|
489
|
-
this.logger.info(
|
|
490
|
-
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
|
|
491
|
-
);
|
|
492
|
-
} else {
|
|
493
|
-
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
let lastBatch = performance.now();
|
|
497
|
-
let nextChunkPromise = query.nextChunk();
|
|
498
|
-
while (true) {
|
|
499
|
-
const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
|
|
500
|
-
if (docBatch.length == 0) {
|
|
501
|
-
// No more data - stop iterating
|
|
502
|
-
break;
|
|
503
|
-
}
|
|
504
|
-
bytesReplicatedMetric.add(chunkBytes);
|
|
505
|
-
chunksReplicatedMetric.add(1);
|
|
506
|
-
|
|
507
|
-
if (this.abort_signal.aborted) {
|
|
508
|
-
throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
// Pre-fetch next batch, so that we can read and write concurrently
|
|
512
|
-
nextChunkPromise = query.nextChunk();
|
|
513
|
-
for (let document of docBatch) {
|
|
514
|
-
const record = this.constructAfterRecord(document);
|
|
515
|
-
|
|
516
|
-
// This auto-flushes when the batch reaches its size limit
|
|
517
|
-
await batch.save({
|
|
518
|
-
tag: SaveOperationTag.INSERT,
|
|
519
|
-
sourceTable: table,
|
|
520
|
-
before: undefined,
|
|
521
|
-
beforeReplicaId: undefined,
|
|
522
|
-
after: record,
|
|
523
|
-
afterReplicaId: document._id
|
|
524
|
-
});
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// Important: flush before marking progress
|
|
528
|
-
await batch.flush();
|
|
529
|
-
at += docBatch.length;
|
|
530
|
-
rowsReplicatedMetric.add(docBatch.length);
|
|
531
|
-
|
|
532
|
-
table = await batch.updateTableProgress(table, {
|
|
533
|
-
lastKey,
|
|
534
|
-
replicatedCount: at,
|
|
535
|
-
totalEstimatedCount: totalEstimatedCount
|
|
536
|
-
});
|
|
537
|
-
this.relationCache.update(table);
|
|
538
|
-
|
|
539
|
-
const duration = performance.now() - lastBatch;
|
|
540
|
-
lastBatch = performance.now();
|
|
541
|
-
this.logger.info(
|
|
542
|
-
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
|
|
543
|
-
);
|
|
544
|
-
this.touch();
|
|
545
|
-
}
|
|
546
|
-
// In case the loop was interrupted, make sure we await the last promise.
|
|
547
|
-
await nextChunkPromise;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
private async getRelation(
|
|
218
|
+
private async getRelations(
|
|
551
219
|
batch: storage.BucketStorageBatch,
|
|
552
220
|
descriptor: SourceEntityDescriptor,
|
|
553
221
|
options: { snapshot: boolean }
|
|
554
|
-
): Promise<SourceTable> {
|
|
555
|
-
const existing = this.relationCache.
|
|
222
|
+
): Promise<SourceTable[]> {
|
|
223
|
+
const existing = this.relationCache.getAll(descriptor);
|
|
556
224
|
if (existing != null) {
|
|
225
|
+
// We do this even when it's an empty result: Empty means nothing to sync, and we don't need to re-resolve.
|
|
557
226
|
return existing;
|
|
558
227
|
}
|
|
559
228
|
|
|
@@ -611,14 +280,11 @@ export class ChangeStream {
|
|
|
611
280
|
}
|
|
612
281
|
|
|
613
282
|
const snapshot = options.snapshot;
|
|
614
|
-
const result = await
|
|
615
|
-
group_id: this.group_id,
|
|
283
|
+
const result = await batch.resolveTables({
|
|
616
284
|
connection_id: this.connection_id,
|
|
617
|
-
|
|
618
|
-
entity_descriptor: descriptor,
|
|
619
|
-
sync_rules: this.sync_rules
|
|
285
|
+
source: descriptor
|
|
620
286
|
});
|
|
621
|
-
this.relationCache.
|
|
287
|
+
this.relationCache.updateAll(descriptor, result.tables);
|
|
622
288
|
|
|
623
289
|
// Drop conflicting collections.
|
|
624
290
|
// This is generally not expected for MongoDB source dbs, so we log an error.
|
|
@@ -632,47 +298,38 @@ export class ChangeStream {
|
|
|
632
298
|
// Snapshot if:
|
|
633
299
|
// 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
|
|
634
300
|
// 2. Snapshot is not already done, AND:
|
|
635
|
-
// 3. The table is used in sync
|
|
636
|
-
const
|
|
637
|
-
if (
|
|
301
|
+
// 3. The table is used in sync config.
|
|
302
|
+
const snapshotCandidates = result.tables.filter((table) => snapshot && !table.snapshotComplete && table.syncAny);
|
|
303
|
+
if (snapshotCandidates.length > 0) {
|
|
638
304
|
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
|
|
639
|
-
|
|
640
|
-
await batch.truncate([result.table]);
|
|
641
|
-
|
|
642
|
-
await this.snapshotTable(batch, result.table);
|
|
643
|
-
const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
|
|
644
|
-
|
|
645
|
-
const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn);
|
|
646
|
-
return table;
|
|
305
|
+
await this.snapshotter.snapshotTables(batch, snapshotCandidates);
|
|
647
306
|
}
|
|
648
307
|
|
|
649
|
-
return result.
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
private constructAfterRecord(document: mongo.Document): SqliteRow {
|
|
653
|
-
const inputRow = constructAfterRecord(document);
|
|
654
|
-
return this.sync_rules.applyRowContext<never>(inputRow);
|
|
308
|
+
return result.tables;
|
|
655
309
|
}
|
|
656
310
|
|
|
657
311
|
async writeChange(
|
|
658
312
|
batch: storage.BucketStorageBatch,
|
|
659
313
|
table: storage.SourceTable,
|
|
660
|
-
change:
|
|
314
|
+
change: ProjectedChangeStreamDocument
|
|
661
315
|
): Promise<storage.FlushedResult | null> {
|
|
662
316
|
if (!table.syncAny) {
|
|
663
|
-
this.logger.debug(`Collection ${table.qualifiedName} not used in sync
|
|
317
|
+
this.logger.debug(`Collection ${table.qualifiedName} not used in sync config - skipping`);
|
|
664
318
|
return null;
|
|
665
319
|
}
|
|
666
320
|
|
|
667
321
|
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
|
|
668
322
|
if (change.operationType == 'insert') {
|
|
669
|
-
const baseRecord = this.
|
|
323
|
+
const { row: baseRecord, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument);
|
|
670
324
|
return await batch.save({
|
|
671
325
|
tag: SaveOperationTag.INSERT,
|
|
672
326
|
sourceTable: table,
|
|
673
327
|
before: undefined,
|
|
674
328
|
beforeReplicaId: undefined,
|
|
675
329
|
after: baseRecord,
|
|
330
|
+
// Same as _replicaId
|
|
331
|
+
// We specifically need to use the source _id, not the converted one in baseRecord,
|
|
332
|
+
// to preserve _id uniqueness properties.
|
|
676
333
|
afterReplicaId: change.documentKey._id
|
|
677
334
|
});
|
|
678
335
|
} else if (change.operationType == 'update' || change.operationType == 'replace') {
|
|
@@ -685,14 +342,14 @@ export class ChangeStream {
|
|
|
685
342
|
beforeReplicaId: change.documentKey._id
|
|
686
343
|
});
|
|
687
344
|
}
|
|
688
|
-
const after = this.
|
|
345
|
+
const { row: after, replicaId: _replicaId } = this.rawToSqliteRow(change.fullDocument!);
|
|
689
346
|
return await batch.save({
|
|
690
347
|
tag: SaveOperationTag.UPDATE,
|
|
691
348
|
sourceTable: table,
|
|
692
349
|
before: undefined,
|
|
693
350
|
beforeReplicaId: undefined,
|
|
694
351
|
after: after,
|
|
695
|
-
afterReplicaId: change.documentKey._id
|
|
352
|
+
afterReplicaId: change.documentKey._id // Same as _replicaId
|
|
696
353
|
});
|
|
697
354
|
} else if (change.operationType == 'delete') {
|
|
698
355
|
return await batch.save({
|
|
@@ -707,38 +364,65 @@ export class ChangeStream {
|
|
|
707
364
|
}
|
|
708
365
|
|
|
709
366
|
async replicate() {
|
|
367
|
+
let streamPromise: Promise<void> | null = null;
|
|
368
|
+
let loopPromise: Promise<void> | null = null;
|
|
710
369
|
try {
|
|
711
370
|
// If anything errors here, the entire replication process is halted, and
|
|
712
371
|
// all connections automatically closed, including this one.
|
|
713
|
-
|
|
714
|
-
await this.
|
|
372
|
+
this.initPromise = this.initReplication();
|
|
373
|
+
await this.initPromise;
|
|
374
|
+
loopPromise = this.snapshotter
|
|
375
|
+
.replicationLoop()
|
|
376
|
+
.then(() => {
|
|
377
|
+
throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`);
|
|
378
|
+
})
|
|
379
|
+
.catch((e) => {
|
|
380
|
+
this.abortController.abort(e);
|
|
381
|
+
throw e;
|
|
382
|
+
});
|
|
383
|
+
if (!this.snapshotter.supportsConcurrentSnapshots) {
|
|
384
|
+
await Promise.race([this.snapshotter.waitForInitialSnapshot(), loopPromise]);
|
|
385
|
+
}
|
|
386
|
+
streamPromise = this.streamChanges()
|
|
387
|
+
.then(() => {
|
|
388
|
+
throw new ReplicationAssertionError(`Replication stream exited unexpectedly`);
|
|
389
|
+
})
|
|
390
|
+
.catch((e) => {
|
|
391
|
+
this.abortController.abort(e);
|
|
392
|
+
throw e;
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
const results = await Promise.allSettled([loopPromise, streamPromise]);
|
|
396
|
+
throw replicationLoopError(results);
|
|
715
397
|
} catch (e) {
|
|
716
398
|
await this.storage.reportError(e);
|
|
717
399
|
throw e;
|
|
400
|
+
} finally {
|
|
401
|
+
this.abortController.abort();
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
public async waitForInitialSnapshot() {
|
|
406
|
+
if (this.initPromise == null) {
|
|
407
|
+
throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()');
|
|
718
408
|
}
|
|
409
|
+
await this.initPromise;
|
|
410
|
+
await this.snapshotter.waitForInitialSnapshot();
|
|
719
411
|
}
|
|
720
412
|
|
|
721
|
-
async initReplication() {
|
|
722
|
-
const result = await this.
|
|
723
|
-
await this.setupCheckpointsCollection();
|
|
413
|
+
private async initReplication() {
|
|
414
|
+
const result = await this.snapshotter.checkSlot();
|
|
415
|
+
await this.snapshotter.setupCheckpointsCollection();
|
|
724
416
|
if (result.needsInitialSync) {
|
|
725
417
|
if (result.snapshotLsn == null) {
|
|
726
418
|
// Snapshot LSN is not present, so we need to start replication from scratch.
|
|
727
|
-
await this.storage.clear({ signal: this.
|
|
728
|
-
}
|
|
729
|
-
const { lastOpId } = await this.initialReplication(result.snapshotLsn);
|
|
730
|
-
if (lastOpId != null) {
|
|
731
|
-
// Populate the cache _after_ initial replication, but _before_ we switch to this sync rules.
|
|
732
|
-
await this.storage.populatePersistentChecksumCache({
|
|
733
|
-
signal: this.abort_signal,
|
|
734
|
-
// No checkpoint yet, but we do have the opId.
|
|
735
|
-
maxOpId: lastOpId
|
|
736
|
-
});
|
|
419
|
+
await this.storage.clear({ signal: this.abortSignal });
|
|
737
420
|
}
|
|
421
|
+
await this.snapshotter.queueSnapshotTables(result.snapshotLsn);
|
|
738
422
|
}
|
|
739
423
|
}
|
|
740
424
|
|
|
741
|
-
async streamChanges() {
|
|
425
|
+
private async streamChanges() {
|
|
742
426
|
try {
|
|
743
427
|
await this.streamChangesInternal();
|
|
744
428
|
} catch (e) {
|
|
@@ -753,19 +437,19 @@ export class ChangeStream {
|
|
|
753
437
|
}
|
|
754
438
|
}
|
|
755
439
|
|
|
756
|
-
private
|
|
440
|
+
private rawChangeStreamBatches(options: {
|
|
441
|
+
lsn: string | null;
|
|
442
|
+
maxAwaitTimeMS?: number;
|
|
443
|
+
batchSize?: number;
|
|
444
|
+
filters: { $match: any; multipleDatabases: boolean };
|
|
445
|
+
signal?: AbortSignal;
|
|
446
|
+
tracer?: PerformanceTracer<'changestream'>;
|
|
447
|
+
}): AsyncIterableIterator<ChangeStreamBatch> {
|
|
757
448
|
const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
|
|
758
449
|
const startAfter = lastLsn?.timestamp;
|
|
759
450
|
const resumeAfter = lastLsn?.resumeToken;
|
|
760
451
|
|
|
761
|
-
const filters =
|
|
762
|
-
|
|
763
|
-
const pipeline: mongo.Document[] = [
|
|
764
|
-
{
|
|
765
|
-
$match: filters.$match
|
|
766
|
-
},
|
|
767
|
-
{ $changeStreamSplitLargeEvent: {} }
|
|
768
|
-
];
|
|
452
|
+
const filters = options.filters;
|
|
769
453
|
|
|
770
454
|
let fullDocument: 'required' | 'updateLookup';
|
|
771
455
|
|
|
@@ -777,12 +461,19 @@ export class ChangeStream {
|
|
|
777
461
|
} else {
|
|
778
462
|
fullDocument = 'updateLookup';
|
|
779
463
|
}
|
|
780
|
-
const streamOptions: mongo.ChangeStreamOptions = {
|
|
464
|
+
const streamOptions: mongo.ChangeStreamOptions & mongo.Document = {
|
|
781
465
|
showExpandedEvents: true,
|
|
782
|
-
|
|
783
|
-
fullDocument: fullDocument,
|
|
784
|
-
maxTimeMS: this.changeStreamTimeout
|
|
466
|
+
fullDocument: fullDocument
|
|
785
467
|
};
|
|
468
|
+
const pipeline: mongo.Document[] = [
|
|
469
|
+
{
|
|
470
|
+
$changeStream: streamOptions
|
|
471
|
+
},
|
|
472
|
+
{
|
|
473
|
+
$match: filters.$match
|
|
474
|
+
},
|
|
475
|
+
{ $changeStreamSplitLargeEvent: {} }
|
|
476
|
+
];
|
|
786
477
|
|
|
787
478
|
/**
|
|
788
479
|
* Only one of these options can be supplied at a time.
|
|
@@ -796,45 +487,27 @@ export class ChangeStream {
|
|
|
796
487
|
streamOptions.startAtOperationTime = startAfter;
|
|
797
488
|
}
|
|
798
489
|
|
|
799
|
-
let
|
|
490
|
+
let watchDb: mongo.Db;
|
|
800
491
|
if (filters.multipleDatabases) {
|
|
801
|
-
|
|
802
|
-
|
|
492
|
+
watchDb = this.client.db('admin');
|
|
493
|
+
streamOptions.allChangesForCluster = true;
|
|
803
494
|
} else {
|
|
804
|
-
|
|
805
|
-
stream = this.defaultDb.watch(pipeline, streamOptions);
|
|
495
|
+
watchDb = this.defaultDb;
|
|
806
496
|
}
|
|
807
497
|
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
498
|
+
return rawChangeStream(watchDb, pipeline, {
|
|
499
|
+
batchSize: options.batchSize ?? this.snapshotChunkLength,
|
|
500
|
+
maxAwaitTimeMS: options.maxAwaitTimeMS ?? this.maxAwaitTimeMS,
|
|
501
|
+
maxTimeMS: this.changeStreamTimeout,
|
|
811
502
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
return stream.close();
|
|
817
|
-
}
|
|
818
|
-
};
|
|
503
|
+
signal: options.signal,
|
|
504
|
+
logger: this.logger,
|
|
505
|
+
tracer: options.tracer
|
|
506
|
+
});
|
|
819
507
|
}
|
|
820
508
|
|
|
821
|
-
private
|
|
822
|
-
|
|
823
|
-
// not expose that through the public ChangeStream API. We use this to detect backlog
|
|
824
|
-
// building up before we have processed the corresponding source changes locally.
|
|
825
|
-
// If the driver API changes, we'll have a hard error here.
|
|
826
|
-
// We specifically want to avoid a silent performance regression if the driver behavior changes.
|
|
827
|
-
const cursor = (
|
|
828
|
-
stream as mongo.ChangeStream<mongo.Document> & {
|
|
829
|
-
cursor: mongo.AbstractCursor<mongo.ChangeStreamDocument<mongo.Document>>;
|
|
830
|
-
}
|
|
831
|
-
).cursor;
|
|
832
|
-
if (cursor == null || typeof cursor.bufferedCount != 'function') {
|
|
833
|
-
throw new ReplicationAssertionError(
|
|
834
|
-
'MongoDB ChangeStream no longer exposes an internal cursor with bufferedCount'
|
|
835
|
-
);
|
|
836
|
-
}
|
|
837
|
-
return cursor.bufferedCount();
|
|
509
|
+
private rawToSqliteRow(row: Buffer) {
|
|
510
|
+
return this.sourceRowConverter.rawToSqliteRow(row);
|
|
838
511
|
}
|
|
839
512
|
|
|
840
513
|
async streamChangesInternal() {
|
|
@@ -842,13 +515,18 @@ export class ChangeStream {
|
|
|
842
515
|
const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
|
|
843
516
|
const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
|
|
844
517
|
|
|
518
|
+
const tracer = new PerformanceTracer<
|
|
519
|
+
'storage' | 'evaluate' | 'batch' | 'source_checkpoint' | 'changestream' | 'processing'
|
|
520
|
+
>('MongoDB streaming replication');
|
|
845
521
|
await this.storage.startBatch(
|
|
846
522
|
{
|
|
847
523
|
logger: this.logger,
|
|
848
524
|
zeroLSN: MongoLSN.ZERO.comparable,
|
|
849
525
|
defaultSchema: this.defaultDb.databaseName,
|
|
850
526
|
// We get a complete postimage for every change, so we don't need to store the current data.
|
|
851
|
-
storeCurrentData: false
|
|
527
|
+
storeCurrentData: false,
|
|
528
|
+
hooks: this.storageHooks,
|
|
529
|
+
tracer
|
|
852
530
|
},
|
|
853
531
|
async (batch) => {
|
|
854
532
|
const { resumeFromLsn } = batch;
|
|
@@ -857,6 +535,7 @@ export class ChangeStream {
|
|
|
857
535
|
}
|
|
858
536
|
const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
|
|
859
537
|
const startAfter = lastLsn?.timestamp;
|
|
538
|
+
let outerSpan = tracer.span('batch');
|
|
860
539
|
|
|
861
540
|
// It is normal for this to be a minute or two old when there is a low volume
|
|
862
541
|
// of ChangeStream events.
|
|
@@ -864,16 +543,13 @@ export class ChangeStream {
|
|
|
864
543
|
|
|
865
544
|
this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
|
|
866
545
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
bytesReplicatedMetric.add(bytes);
|
|
875
|
-
// Each of these represent a single response message from MongoDB.
|
|
876
|
-
chunksReplicatedMetric.add(1);
|
|
546
|
+
const filters = this.getSourceNamespaceFilters();
|
|
547
|
+
// This is closed when the for loop below returns/breaks/throws
|
|
548
|
+
const batchStream = this.rawChangeStreamBatches({
|
|
549
|
+
lsn: resumeFromLsn,
|
|
550
|
+
filters,
|
|
551
|
+
signal: this.abortSignal,
|
|
552
|
+
tracer
|
|
877
553
|
});
|
|
878
554
|
|
|
879
555
|
// Always start with a checkpoint.
|
|
@@ -885,44 +561,30 @@ export class ChangeStream {
|
|
|
885
561
|
this.checkpointStreamId
|
|
886
562
|
);
|
|
887
563
|
|
|
888
|
-
let splitDocument:
|
|
564
|
+
let splitDocument: ProjectedChangeStreamDocument | null = null;
|
|
889
565
|
|
|
890
566
|
let flexDbNameWorkaroundLogged = false;
|
|
891
|
-
let changesSinceLastCheckpoint = 0;
|
|
892
567
|
|
|
893
568
|
let lastEmptyResume = performance.now();
|
|
894
569
|
let lastTxnKey: string | null = null;
|
|
895
570
|
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
}
|
|
571
|
+
for await (let eventBatch of batchStream) {
|
|
572
|
+
const { events, resumeToken } = eventBatch;
|
|
573
|
+
using batchSpan = tracer.span('processing');
|
|
900
574
|
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
// The stream was closed, we will only ever receive `null` from it
|
|
905
|
-
if (!originalChangeDocument && stream.closed) {
|
|
906
|
-
break;
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
if (this.abort_signal.aborted) {
|
|
575
|
+
bytesReplicatedMetric.add(eventBatch.byteSize);
|
|
576
|
+
chunksReplicatedMetric.add(1);
|
|
577
|
+
if (this.abortSignal.aborted) {
|
|
910
578
|
break;
|
|
911
579
|
}
|
|
912
|
-
|
|
913
|
-
if (
|
|
914
|
-
//
|
|
915
|
-
//
|
|
916
|
-
// stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
|
|
917
|
-
// does update it.
|
|
918
|
-
// From observed behavior, the actual resumeToken changes around once every 10 seconds.
|
|
580
|
+
this.touch();
|
|
581
|
+
if (events.length == 0) {
|
|
582
|
+
// No changes in this batch, but we still want to keep the connection alive.
|
|
583
|
+
// We do this by persisting a keepalive checkpoint.
|
|
919
584
|
// If we don't update it on empty events, we do keep consistency, but resuming the stream
|
|
920
585
|
// with old tokens may cause connection timeouts.
|
|
921
|
-
// We throttle this further by only persisting a keepalive once a minute.
|
|
922
|
-
// We add an additional check for waitForCheckpointLsn == null, to make sure we're not
|
|
923
|
-
// doing a keepalive in the middle of a transaction.
|
|
924
586
|
if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
|
|
925
|
-
const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(
|
|
587
|
+
const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(resumeToken);
|
|
926
588
|
await batch.keepalive(lsn);
|
|
927
589
|
this.touch();
|
|
928
590
|
lastEmptyResume = performance.now();
|
|
@@ -933,229 +595,267 @@ export class ChangeStream {
|
|
|
933
595
|
);
|
|
934
596
|
this.replicationLag.markStarted();
|
|
935
597
|
}
|
|
936
|
-
continue;
|
|
937
|
-
}
|
|
938
598
|
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
599
|
+
// If we have no changes, we can just persist the keepalive.
|
|
600
|
+
// This is throttled to once per minute.
|
|
601
|
+
if (performance.now() - lastEmptyResume < 60_000) {
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
943
604
|
}
|
|
944
605
|
|
|
945
|
-
|
|
946
|
-
if (originalChangeDocument?.splitEvent != null) {
|
|
947
|
-
// Handle split events from $changeStreamSplitLargeEvent.
|
|
948
|
-
// This is only relevant for very large update operations.
|
|
949
|
-
const splitEvent = originalChangeDocument?.splitEvent;
|
|
606
|
+
this.touch();
|
|
950
607
|
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
608
|
+
for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
|
|
609
|
+
const rawChangeDocument = events[eventIndex];
|
|
610
|
+
const originalChangeDocument = parseChangeDocument(rawChangeDocument);
|
|
611
|
+
if (this.abortSignal.aborted) {
|
|
612
|
+
break;
|
|
955
613
|
}
|
|
956
614
|
|
|
957
|
-
if (
|
|
958
|
-
// Got all fragments
|
|
959
|
-
changeDocument = splitDocument;
|
|
960
|
-
splitDocument = null;
|
|
961
|
-
} else {
|
|
962
|
-
// Wait for more fragments
|
|
615
|
+
if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
|
|
963
616
|
continue;
|
|
964
617
|
}
|
|
965
|
-
} else if (splitDocument != null) {
|
|
966
|
-
// We were waiting for fragments, but got a different event
|
|
967
|
-
throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
|
|
968
|
-
}
|
|
969
618
|
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
) {
|
|
976
|
-
// When all of the following conditions are met:
|
|
977
|
-
// 1. We're replicating from an Atlas Flex instance.
|
|
978
|
-
// 2. There were changestream events recorded while the PowerSync service is paused.
|
|
979
|
-
// 3. We're only replicating from a single database.
|
|
980
|
-
// Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
|
|
981
|
-
// instead of the expected {db: 'ps'}.
|
|
982
|
-
// We correct this.
|
|
983
|
-
changeDocument.ns.db = this.defaultDb.databaseName;
|
|
984
|
-
|
|
985
|
-
if (!flexDbNameWorkaroundLogged) {
|
|
986
|
-
flexDbNameWorkaroundLogged = true;
|
|
987
|
-
this.logger.warn(
|
|
988
|
-
`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
|
|
989
|
-
);
|
|
990
|
-
}
|
|
991
|
-
}
|
|
619
|
+
let changeDocument = originalChangeDocument;
|
|
620
|
+
if (originalChangeDocument?.splitEvent != null) {
|
|
621
|
+
// Handle split events from $changeStreamSplitLargeEvent.
|
|
622
|
+
// This is only relevant for very large update operations.
|
|
623
|
+
const splitEvent = originalChangeDocument?.splitEvent;
|
|
992
624
|
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
throw new ChangeStreamInvalidatedError(
|
|
1011
|
-
'Internal collections have been dropped',
|
|
1012
|
-
new Error('_checkpoints collection was dropped')
|
|
1013
|
-
);
|
|
625
|
+
if (splitDocument == null) {
|
|
626
|
+
splitDocument = originalChangeDocument;
|
|
627
|
+
} else {
|
|
628
|
+
splitDocument = Object.assign(splitDocument, originalChangeDocument);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
if (splitEvent.fragment == splitEvent.of) {
|
|
632
|
+
// Got all fragments
|
|
633
|
+
changeDocument = splitDocument;
|
|
634
|
+
splitDocument = null;
|
|
635
|
+
} else {
|
|
636
|
+
// Wait for more fragments
|
|
637
|
+
continue;
|
|
638
|
+
}
|
|
639
|
+
} else if (splitDocument != null) {
|
|
640
|
+
// We were waiting for fragments, but got a different event
|
|
641
|
+
throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`);
|
|
1014
642
|
}
|
|
1015
643
|
|
|
1016
644
|
if (
|
|
1017
|
-
!
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
)
|
|
645
|
+
!filters.multipleDatabases &&
|
|
646
|
+
'ns' in changeDocument &&
|
|
647
|
+
changeDocument.ns.db != this.defaultDb.databaseName &&
|
|
648
|
+
changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`)
|
|
1022
649
|
) {
|
|
1023
|
-
|
|
650
|
+
// When all of the following conditions are met:
|
|
651
|
+
// 1. We're replicating from an Atlas Flex instance.
|
|
652
|
+
// 2. There were changestream events recorded while the PowerSync service is paused.
|
|
653
|
+
// 3. We're only replicating from a single database.
|
|
654
|
+
// Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'},
|
|
655
|
+
// instead of the expected {db: 'ps'}.
|
|
656
|
+
// We correct this.
|
|
657
|
+
changeDocument.ns.db = this.defaultDb.databaseName;
|
|
658
|
+
|
|
659
|
+
if (!flexDbNameWorkaroundLogged) {
|
|
660
|
+
flexDbNameWorkaroundLogged = true;
|
|
661
|
+
this.logger.warn(
|
|
662
|
+
`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
|
|
663
|
+
);
|
|
664
|
+
}
|
|
1024
665
|
}
|
|
1025
666
|
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
667
|
+
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
668
|
+
|
|
669
|
+
if (ns?.coll == CHECKPOINTS_COLLECTION) {
|
|
670
|
+
/**
|
|
671
|
+
* Dropping the database does not provide an `invalidate` event.
|
|
672
|
+
* We typically would receive `drop` events for the collection which we
|
|
673
|
+
* would process below.
|
|
674
|
+
*
|
|
675
|
+
* However we don't commit the LSN after collections are dropped.
|
|
676
|
+
* This prevents the `startAfter` or `resumeToken` from advancing past the drop events.
|
|
677
|
+
* The stream also closes after the drop events.
|
|
678
|
+
* This causes an infinite loop of processing the collection drop events.
|
|
679
|
+
*
|
|
680
|
+
* This check here invalidates the change stream if our `_powersync_checkpoints` collection
|
|
681
|
+
* is dropped. This allows for detecting when the DB is dropped.
|
|
682
|
+
*/
|
|
683
|
+
if (changeDocument.operationType == 'drop') {
|
|
684
|
+
throw new ChangeStreamInvalidatedError(
|
|
685
|
+
'Internal collections have been dropped',
|
|
686
|
+
new Error('_powersync_checkpoints collection was dropped')
|
|
687
|
+
);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
if (
|
|
691
|
+
!(
|
|
692
|
+
changeDocument.operationType == 'insert' ||
|
|
693
|
+
changeDocument.operationType == 'update' ||
|
|
694
|
+
changeDocument.operationType == 'replace'
|
|
695
|
+
)
|
|
696
|
+
) {
|
|
697
|
+
continue;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// We handle two types of checkpoint events:
|
|
701
|
+
// 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
|
|
702
|
+
// immediately, regardless of where they were created.
|
|
703
|
+
// 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
|
|
704
|
+
// limiting of commits, so we specifically want to exclude checkpoints from other streams.
|
|
705
|
+
//
|
|
706
|
+
// It may be useful to also throttle commits due to standalone checkpoints in the future.
|
|
707
|
+
// However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
|
|
708
|
+
|
|
709
|
+
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
710
|
+
|
|
711
|
+
if (checkpointId == STANDALONE_CHECKPOINT_ID) {
|
|
712
|
+
// Standalone / write checkpoint received.
|
|
713
|
+
// When we are caught up, commit immediately to keep write checkpoint latency low.
|
|
714
|
+
// Once there is already a batch checkpoint pending, or the driver has buffered more
|
|
715
|
+
// change stream events, collapse standalone checkpoints into the normal batch
|
|
716
|
+
// checkpoint flow to avoid commit churn under sustained load.
|
|
717
|
+
const hasBufferedChanges = eventIndex < events.length - 1;
|
|
718
|
+
if (hasBufferedChanges && waitForCheckpointLsn == null) {
|
|
719
|
+
// Buffered changes - create a new batch checkpoint to rate limit commits
|
|
720
|
+
using _ = tracer.span('source_checkpoint');
|
|
1045
721
|
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
722
|
+
continue;
|
|
723
|
+
} else if (waitForCheckpointLsn != null) {
|
|
724
|
+
// Skip this checkpoint - wait for the batch checkpoint.
|
|
725
|
+
continue;
|
|
726
|
+
} else {
|
|
727
|
+
// No buffered changes, and no batch checkpoint pending - commit immediately.
|
|
1046
728
|
}
|
|
729
|
+
} else if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
1047
730
|
continue;
|
|
1048
731
|
}
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
`Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`
|
|
1064
|
-
);
|
|
1065
|
-
}
|
|
1066
|
-
|
|
1067
|
-
if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
|
|
1068
|
-
waitForCheckpointLsn = null;
|
|
1069
|
-
}
|
|
1070
|
-
const { checkpointBlocked } = await batch.commit(lsn, {
|
|
1071
|
-
oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
|
|
1072
|
-
});
|
|
732
|
+
const { comparable: lsn } = new MongoLSN({
|
|
733
|
+
timestamp: changeDocument.clusterTime!,
|
|
734
|
+
resume_token: changeDocument._id
|
|
735
|
+
});
|
|
736
|
+
if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) {
|
|
737
|
+
// Checkpoint out of order - should never happen with MongoDB.
|
|
738
|
+
// If it does happen, we throw an error to stop the replication - restarting should recover.
|
|
739
|
+
// Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop.
|
|
740
|
+
// Originally a workaround for https://jira.mongodb.org/browse/NODE-7042.
|
|
741
|
+
// This has been fixed in the driver in the meantime, but we still keep this as a safety-check.
|
|
742
|
+
throw new ReplicationAssertionError(
|
|
743
|
+
`Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.`
|
|
744
|
+
);
|
|
745
|
+
}
|
|
1073
746
|
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
changeDocument.operationType == 'update' ||
|
|
1081
|
-
changeDocument.operationType == 'replace' ||
|
|
1082
|
-
changeDocument.operationType == 'delete'
|
|
1083
|
-
) {
|
|
1084
|
-
if (waitForCheckpointLsn == null) {
|
|
1085
|
-
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
1086
|
-
}
|
|
747
|
+
if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
|
|
748
|
+
waitForCheckpointLsn = null;
|
|
749
|
+
}
|
|
750
|
+
const { checkpointBlocked } = await batch.commit(lsn, {
|
|
751
|
+
oldestUncommittedChange: this.replicationLag.oldestUncommittedChange
|
|
752
|
+
});
|
|
1087
753
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
754
|
+
if (!checkpointBlocked) {
|
|
755
|
+
this.replicationLag.markCommitted();
|
|
756
|
+
}
|
|
757
|
+
} else if (
|
|
758
|
+
changeDocument.operationType == 'insert' ||
|
|
759
|
+
changeDocument.operationType == 'update' ||
|
|
760
|
+
changeDocument.operationType == 'replace' ||
|
|
761
|
+
changeDocument.operationType == 'delete'
|
|
762
|
+
) {
|
|
763
|
+
if (waitForCheckpointLsn == null) {
|
|
764
|
+
using _ = tracer.span('source_checkpoint');
|
|
765
|
+
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
766
|
+
}
|
|
1100
767
|
|
|
1101
|
-
const
|
|
768
|
+
const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
769
|
+
const tables = await this.getRelations(batch, rel, {
|
|
770
|
+
// In most cases, we should not need to snapshot this. But if this is the first time we see the collection
|
|
771
|
+
// for whatever reason, then we do need to snapshot it.
|
|
772
|
+
// This may result in some duplicate operations when a collection is created for the first time after
|
|
773
|
+
// sync config was deployed.
|
|
774
|
+
snapshot: true
|
|
775
|
+
});
|
|
776
|
+
const tablesToReplicate = tables.filter((table) => table.syncAny);
|
|
777
|
+
if (tablesToReplicate.length > 0) {
|
|
778
|
+
this.replicationLag.trackUncommittedChange(
|
|
779
|
+
changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime)
|
|
780
|
+
);
|
|
781
|
+
|
|
782
|
+
const transactionKeyValue = transactionKey(changeDocument);
|
|
783
|
+
|
|
784
|
+
if (transactionKeyValue == null || lastTxnKey != transactionKeyValue) {
|
|
785
|
+
// Very crude metric for counting transactions replicated.
|
|
786
|
+
// We ignore operations other than basic CRUD, and ignore changes to _powersync_checkpoints.
|
|
787
|
+
// Individual writes may not have a txnNumber, in which case we count them as separate transactions.
|
|
788
|
+
lastTxnKey = transactionKeyValue;
|
|
789
|
+
transactionsReplicatedMetric.add(1);
|
|
790
|
+
}
|
|
1102
791
|
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
// Individual writes may not have a txnNumber, in which case we count them as separate transactions.
|
|
1107
|
-
lastTxnKey = transactionKeyValue;
|
|
1108
|
-
transactionsReplicatedMetric.add(1);
|
|
792
|
+
for (const table of tablesToReplicate) {
|
|
793
|
+
await this.writeChange(batch, table, changeDocument);
|
|
794
|
+
}
|
|
1109
795
|
}
|
|
1110
|
-
|
|
1111
|
-
const
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
796
|
+
} else if (changeDocument.operationType == 'drop') {
|
|
797
|
+
const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
798
|
+
const tables = await this.getRelations(batch, rel, {
|
|
799
|
+
// We're "dropping" this collection, so never snapshot it.
|
|
800
|
+
snapshot: false
|
|
801
|
+
});
|
|
802
|
+
const tablesToDrop = tables.filter((table) => table.syncAny);
|
|
803
|
+
if (tablesToDrop.length > 0) {
|
|
804
|
+
await batch.drop(tablesToDrop);
|
|
805
|
+
}
|
|
806
|
+
this.relationCache.delete(rel);
|
|
807
|
+
} else if (changeDocument.operationType == 'rename') {
|
|
808
|
+
const relFrom = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
809
|
+
const relTo = getMongoRelation(changeDocument.to, this.connections.connectionTag);
|
|
810
|
+
const tablesFrom = await this.getRelations(batch, relFrom, {
|
|
811
|
+
// We're "dropping" this collection, so never snapshot it.
|
|
812
|
+
snapshot: false
|
|
813
|
+
});
|
|
814
|
+
const tablesToDrop = tablesFrom.filter((table) => table.syncAny);
|
|
815
|
+
if (tablesToDrop.length > 0) {
|
|
816
|
+
await batch.drop(tablesToDrop);
|
|
1125
817
|
}
|
|
1126
|
-
}
|
|
1127
|
-
} else if (changeDocument.operationType == 'drop') {
|
|
1128
|
-
const rel = getMongoRelation(changeDocument.ns);
|
|
1129
|
-
const table = await this.getRelation(batch, rel, {
|
|
1130
|
-
// We're "dropping" this collection, so never snapshot it.
|
|
1131
|
-
snapshot: false
|
|
1132
|
-
});
|
|
1133
|
-
if (table.syncAny) {
|
|
1134
|
-
await batch.drop([table]);
|
|
1135
|
-
this.relationCache.delete(table);
|
|
1136
|
-
}
|
|
1137
|
-
} else if (changeDocument.operationType == 'rename') {
|
|
1138
|
-
const relFrom = getMongoRelation(changeDocument.ns);
|
|
1139
|
-
const relTo = getMongoRelation(changeDocument.to);
|
|
1140
|
-
const tableFrom = await this.getRelation(batch, relFrom, {
|
|
1141
|
-
// We're "dropping" this collection, so never snapshot it.
|
|
1142
|
-
snapshot: false
|
|
1143
|
-
});
|
|
1144
|
-
if (tableFrom.syncAny) {
|
|
1145
|
-
await batch.drop([tableFrom]);
|
|
1146
818
|
this.relationCache.delete(relFrom);
|
|
819
|
+
// Here we do need to snapshot the new table
|
|
820
|
+
const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
|
|
821
|
+
await this.handleRelation(batch, relTo, {
|
|
822
|
+
// This is a new (renamed) collection, so always snapshot it.
|
|
823
|
+
snapshot: true,
|
|
824
|
+
collectionInfo: collection
|
|
825
|
+
});
|
|
1147
826
|
}
|
|
1148
|
-
// Here we do need to snapshot the new table
|
|
1149
|
-
const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
|
|
1150
|
-
await this.handleRelation(batch, relTo, {
|
|
1151
|
-
// This is a new (renamed) collection, so always snapshot it.
|
|
1152
|
-
snapshot: true,
|
|
1153
|
-
collectionInfo: collection
|
|
1154
|
-
});
|
|
1155
827
|
}
|
|
828
|
+
|
|
829
|
+
if (splitDocument == null) {
|
|
830
|
+
// We flush and mark progress on every batch of data we receive.
|
|
831
|
+
// Batches are generally large (64MB or 6000 events, whichever comes first),
|
|
832
|
+
// so this is a good natural point to flush and mark progress.
|
|
833
|
+
// We avoid this when splitDocument is set, since we cannot resume in the middle of a split event.
|
|
834
|
+
const { comparable: lsn } = MongoLSN.fromResumeToken(resumeToken);
|
|
835
|
+
await batch.flush({ oldestUncommittedChange: this.replicationLag.oldestUncommittedChange });
|
|
836
|
+
// TODO: We should consider making this standard behavior of flush().
|
|
837
|
+
await batch.setResumeLsn(lsn);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
batchSpan.end();
|
|
841
|
+
const durationsMicroseconds = outerSpan.end();
|
|
842
|
+
const duration = batchSpan.durationMillis;
|
|
843
|
+
|
|
844
|
+
this.logger.info(
|
|
845
|
+
`Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`,
|
|
846
|
+
{
|
|
847
|
+
count: events.length,
|
|
848
|
+
bytes: eventBatch.byteSize,
|
|
849
|
+
duration,
|
|
850
|
+
t: durationsMicroseconds
|
|
851
|
+
}
|
|
852
|
+
);
|
|
853
|
+
outerSpan = tracer.span('batch');
|
|
1156
854
|
}
|
|
1157
855
|
}
|
|
1158
856
|
);
|
|
857
|
+
|
|
858
|
+
throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason);
|
|
1159
859
|
}
|
|
1160
860
|
|
|
1161
861
|
getReplicationLagMillis(): number | undefined {
|
|
@@ -1175,34 +875,35 @@ export class ChangeStream {
|
|
|
1175
875
|
}
|
|
1176
876
|
}
|
|
1177
877
|
|
|
1178
|
-
function mapChangeStreamError(e: any) {
|
|
1179
|
-
if (isMongoNetworkTimeoutError(e)) {
|
|
1180
|
-
// This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out".
|
|
1181
|
-
// We wrap the error to make it more useful.
|
|
1182
|
-
throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
|
|
1183
|
-
} else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
|
|
1184
|
-
// maxTimeMS was reached. Example message:
|
|
1185
|
-
// MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit
|
|
1186
|
-
throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e);
|
|
1187
|
-
} else if (
|
|
1188
|
-
isMongoServerError(e) &&
|
|
1189
|
-
e.codeName == 'NoMatchingDocument' &&
|
|
1190
|
-
e.errmsg?.includes('post-image was not found')
|
|
1191
|
-
) {
|
|
1192
|
-
throw new ChangeStreamInvalidatedError(e.errmsg, e);
|
|
1193
|
-
} else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) {
|
|
1194
|
-
throw new ChangeStreamInvalidatedError(e.message, e);
|
|
1195
|
-
} else {
|
|
1196
|
-
throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e);
|
|
1197
|
-
}
|
|
1198
|
-
}
|
|
1199
|
-
|
|
1200
878
|
/**
|
|
1201
879
|
* Transaction key for a change stream event, used to detect transaction boundaries. Returns null if the event is not part of a transaction.
|
|
1202
880
|
*/
|
|
1203
|
-
function transactionKey(doc: mongo.ChangeStreamDocument): string | null {
|
|
881
|
+
function transactionKey(doc: Pick<mongo.ChangeStreamDocument, 'lsid' | 'txnNumber'>): string | null {
|
|
1204
882
|
if (doc.txnNumber == null || doc.lsid == null) {
|
|
1205
883
|
return null;
|
|
1206
884
|
}
|
|
1207
885
|
return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
|
|
1208
886
|
}
|
|
887
|
+
|
|
888
|
+
/**
|
|
889
|
+
* Prioritize errors that are _not_ ReplicationAbortedError. Any error on either loopPromise or
|
|
890
|
+
* streamPromise aborts the other one, which then results in a ReplicationAbortedError, hiding the
|
|
891
|
+
* original cause.
|
|
892
|
+
*/
|
|
893
|
+
function replicationLoopError(results: PromiseSettledResult<any>[]): unknown {
|
|
894
|
+
// 1. Prioritize not ReplicationAbortedError.
|
|
895
|
+
for (const result of results) {
|
|
896
|
+
if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) {
|
|
897
|
+
return result.reason;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
// 2. Fallback to ReplicationAbortedError.
|
|
901
|
+
for (const result of results) {
|
|
902
|
+
if (result.status == 'rejected') {
|
|
903
|
+
// At this point only ReplicationAbortedError remains
|
|
904
|
+
return result.reason;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
// 3. Should never happen, but we cover this case.
|
|
908
|
+
return new ReplicationAssertionError(`Replication loop exited unexpectedly`);
|
|
909
|
+
}
|