@powersync/service-module-mongodb 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/dist/api/MongoRouteAPIAdapter.js +12 -21
- package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
- package/dist/replication/ChangeStream.d.ts +18 -37
- package/dist/replication/ChangeStream.js +136 -351
- package/dist/replication/ChangeStream.js.map +1 -1
- package/dist/replication/MongoRelation.d.ts +1 -1
- package/dist/replication/MongoRelation.js +41 -21
- package/dist/replication/MongoRelation.js.map +1 -1
- package/dist/replication/MongoSnapshotter.d.ts +81 -0
- package/dist/replication/MongoSnapshotter.js +594 -0
- package/dist/replication/MongoSnapshotter.js.map +1 -0
- package/package.json +8 -8
- package/src/api/MongoRouteAPIAdapter.ts +13 -21
- package/src/replication/ChangeStream.ts +150 -426
- package/src/replication/MongoRelation.ts +51 -25
- package/src/replication/MongoSnapshotter.ts +729 -0
- package/test/src/change_stream.test.ts +210 -17
- package/test/src/change_stream_utils.ts +24 -17
- package/test/src/checkpoint_retry.test.ts +131 -0
- package/test/src/resuming_snapshots.test.ts +10 -6
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -18,7 +18,7 @@ import {
|
|
|
18
18
|
SourceTable,
|
|
19
19
|
storage
|
|
20
20
|
} from '@powersync/service-core';
|
|
21
|
-
import {
|
|
21
|
+
import { HydratedSyncConfig } from '@powersync/service-sync-rules';
|
|
22
22
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
23
23
|
import { performance } from 'node:perf_hooks';
|
|
24
24
|
import { MongoLSN } from '../common/MongoLSN.js';
|
|
@@ -26,7 +26,7 @@ import { PostImagesOption } from '../types/types.js';
|
|
|
26
26
|
import { escapeRegExp } from '../utils.js';
|
|
27
27
|
import { MongoManager } from './MongoManager.js';
|
|
28
28
|
import { createCheckpoint, getCacheIdentifier, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js';
|
|
29
|
-
import {
|
|
29
|
+
import { MongoSnapshotter, MongoSnapshotterHooks } from './MongoSnapshotter.js';
|
|
30
30
|
import {
|
|
31
31
|
ChangeStreamBatch,
|
|
32
32
|
parseChangeDocument,
|
|
@@ -53,12 +53,10 @@ export interface ChangeStreamOptions {
|
|
|
53
53
|
*/
|
|
54
54
|
snapshotChunkLength?: number;
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
storageHooks?: storage.StorageHooks;
|
|
57
|
+
snapshotHooks?: MongoSnapshotterHooks;
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
needsInitialSync: boolean;
|
|
61
|
-
snapshotLsn: string | null;
|
|
59
|
+
logger?: Logger;
|
|
62
60
|
}
|
|
63
61
|
|
|
64
62
|
/**
|
|
@@ -76,7 +74,7 @@ export class ChangeStreamInvalidatedError extends DatabaseConnectionError {
|
|
|
76
74
|
}
|
|
77
75
|
|
|
78
76
|
export class ChangeStream {
|
|
79
|
-
sync_rules:
|
|
77
|
+
sync_rules: HydratedSyncConfig;
|
|
80
78
|
group_id: number;
|
|
81
79
|
|
|
82
80
|
connection_id = 1;
|
|
@@ -90,8 +88,15 @@ export class ChangeStream {
|
|
|
90
88
|
|
|
91
89
|
private readonly maxAwaitTimeMS: number;
|
|
92
90
|
|
|
93
|
-
private
|
|
91
|
+
private abortController = new AbortController();
|
|
92
|
+
private abortSignal: AbortSignal = this.abortController.signal;
|
|
94
93
|
|
|
94
|
+
private initPromise: Promise<void> | null = null;
|
|
95
|
+
private snapshotter: MongoSnapshotter;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* We use the relationCache _only_ for caching static SourceTable info, not for snapshot status.
|
|
99
|
+
*/
|
|
95
100
|
private relationCache = new RelationCache(getCacheIdentifier);
|
|
96
101
|
|
|
97
102
|
private replicationLag = new ReplicationLagTracker();
|
|
@@ -104,6 +109,8 @@ export class ChangeStream {
|
|
|
104
109
|
|
|
105
110
|
private changeStreamTimeout: number;
|
|
106
111
|
|
|
112
|
+
private storageHooks: storage.StorageHooks | undefined;
|
|
113
|
+
|
|
107
114
|
private readonly sourceRowConverter: SourceRowConverter;
|
|
108
115
|
|
|
109
116
|
constructor(options: ChangeStreamOptions) {
|
|
@@ -113,6 +120,7 @@ export class ChangeStream {
|
|
|
113
120
|
this.connections = options.connections;
|
|
114
121
|
this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
|
|
115
122
|
this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
|
|
123
|
+
this.storageHooks = options.storageHooks;
|
|
116
124
|
this.client = this.connections.client;
|
|
117
125
|
this.defaultDb = this.connections.db;
|
|
118
126
|
this.sync_rules = options.storage.getParsedSyncRules({
|
|
@@ -124,20 +132,28 @@ export class ChangeStream {
|
|
|
124
132
|
// so we use 90% of the socket timeout value.
|
|
125
133
|
this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9);
|
|
126
134
|
|
|
127
|
-
this.
|
|
128
|
-
this.
|
|
135
|
+
this.logger = options.logger ?? this.storage.logger;
|
|
136
|
+
this.snapshotter = new MongoSnapshotter({
|
|
137
|
+
...options,
|
|
138
|
+
abortSignal: this.abortSignal,
|
|
139
|
+
logger: this.logger,
|
|
140
|
+
checkpointStreamId: this.checkpointStreamId
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
options.abort_signal.addEventListener(
|
|
129
144
|
'abort',
|
|
130
145
|
() => {
|
|
131
|
-
|
|
146
|
+
this.abortController.abort(options.abort_signal.reason);
|
|
132
147
|
},
|
|
133
148
|
{ once: true }
|
|
134
149
|
);
|
|
135
|
-
|
|
136
|
-
|
|
150
|
+
if (options.abort_signal.aborted) {
|
|
151
|
+
this.abortController.abort(options.abort_signal.reason);
|
|
152
|
+
}
|
|
137
153
|
}
|
|
138
154
|
|
|
139
155
|
get stopped() {
|
|
140
|
-
return this.
|
|
156
|
+
return this.abortSignal.aborted;
|
|
141
157
|
}
|
|
142
158
|
|
|
143
159
|
private get usePostImages() {
|
|
@@ -148,279 +164,6 @@ export class ChangeStream {
|
|
|
148
164
|
return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
|
|
149
165
|
}
|
|
150
166
|
|
|
151
|
-
/**
|
|
152
|
-
* This resolves a pattern, persists the related metadata, and returns
|
|
153
|
-
* the resulting SourceTables.
|
|
154
|
-
*
|
|
155
|
-
* This implicitly checks the collection postImage configuration.
|
|
156
|
-
*/
|
|
157
|
-
async resolveQualifiedTableNames(
|
|
158
|
-
batch: storage.BucketStorageBatch,
|
|
159
|
-
tablePattern: TablePattern
|
|
160
|
-
): Promise<storage.SourceTable[]> {
|
|
161
|
-
const schema = tablePattern.schema;
|
|
162
|
-
if (tablePattern.connectionTag != this.connections.connectionTag) {
|
|
163
|
-
return [];
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
let nameFilter: RegExp | string;
|
|
167
|
-
if (tablePattern.isWildcard) {
|
|
168
|
-
nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix));
|
|
169
|
-
} else {
|
|
170
|
-
nameFilter = tablePattern.name;
|
|
171
|
-
}
|
|
172
|
-
let result: storage.SourceTable[] = [];
|
|
173
|
-
|
|
174
|
-
// Check if the collection exists
|
|
175
|
-
const collections = await this.client
|
|
176
|
-
.db(schema)
|
|
177
|
-
.listCollections(
|
|
178
|
-
{
|
|
179
|
-
name: nameFilter
|
|
180
|
-
},
|
|
181
|
-
{ nameOnly: false }
|
|
182
|
-
)
|
|
183
|
-
.toArray();
|
|
184
|
-
|
|
185
|
-
if (!tablePattern.isWildcard && collections.length == 0) {
|
|
186
|
-
this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
for (let collection of collections) {
|
|
190
|
-
const table = await this.handleRelation(
|
|
191
|
-
batch,
|
|
192
|
-
getMongoRelation({ db: schema, coll: collection.name }),
|
|
193
|
-
// This is done as part of the initial setup - snapshot is handled elsewhere
|
|
194
|
-
{ snapshot: false, collectionInfo: collection }
|
|
195
|
-
);
|
|
196
|
-
|
|
197
|
-
result.push(table);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
return result;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
async initSlot(): Promise<InitResult> {
|
|
204
|
-
const status = await this.storage.getStatus();
|
|
205
|
-
if (status.snapshot_done && status.checkpoint_lsn) {
|
|
206
|
-
this.logger.info(`Initial replication already done`);
|
|
207
|
-
return { needsInitialSync: false, snapshotLsn: null };
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
async estimatedCount(table: storage.SourceTable): Promise<string> {
|
|
214
|
-
const count = await this.estimatedCountNumber(table);
|
|
215
|
-
return `~${count}`;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
|
|
219
|
-
const db = this.client.db(table.schema);
|
|
220
|
-
return await db.collection(table.name).estimatedDocumentCount();
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
|
|
225
|
-
*
|
|
226
|
-
* This LSN can survive initial replication restarts.
|
|
227
|
-
*/
|
|
228
|
-
private async getSnapshotLsn(): Promise<string> {
|
|
229
|
-
const hello = await this.defaultDb.command({ hello: 1 });
|
|
230
|
-
// Basic sanity check
|
|
231
|
-
if (hello.msg == 'isdbgrid') {
|
|
232
|
-
throw new ServiceError(
|
|
233
|
-
ErrorCode.PSYNC_S1341,
|
|
234
|
-
'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).'
|
|
235
|
-
);
|
|
236
|
-
} else if (hello.setName == null) {
|
|
237
|
-
throw new ServiceError(
|
|
238
|
-
ErrorCode.PSYNC_S1342,
|
|
239
|
-
'Standalone MongoDB instances are not supported - use a replicaset.'
|
|
240
|
-
);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// Open a change stream just to get a resume token for later use.
|
|
244
|
-
// We could use clusterTime from the hello command, but that won't tell us if the
|
|
245
|
-
// snapshot isn't valid anymore.
|
|
246
|
-
// If we just use the first resumeToken from the stream, we get two potential issues:
|
|
247
|
-
// 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
|
|
248
|
-
// in source db or other stream issues.
|
|
249
|
-
// 2. The first actual change we get may have the same clusterTime, causing us to incorrect
|
|
250
|
-
// skip that event.
|
|
251
|
-
// Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
|
|
252
|
-
// To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
|
|
253
|
-
// periodically until the timeout is reached.
|
|
254
|
-
|
|
255
|
-
const LSN_TIMEOUT_SECONDS = 60;
|
|
256
|
-
const LSN_CREATE_INTERVAL_SECONDS = 1;
|
|
257
|
-
|
|
258
|
-
// Create a checkpoint, and open a change stream using startAtOperationTime with the checkpoint's operationTime.
|
|
259
|
-
const firstCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
260
|
-
|
|
261
|
-
const startTime = performance.now();
|
|
262
|
-
let lastCheckpointCreated = performance.now();
|
|
263
|
-
let eventsSeen = 0;
|
|
264
|
-
let batchesSeen = 0;
|
|
265
|
-
|
|
266
|
-
const filters = this.getSourceNamespaceFilters();
|
|
267
|
-
const iter = this.rawChangeStreamBatches({
|
|
268
|
-
lsn: firstCheckpointLsn,
|
|
269
|
-
maxAwaitTimeMS: 0,
|
|
270
|
-
signal: this.abort_signal,
|
|
271
|
-
filters
|
|
272
|
-
});
|
|
273
|
-
for await (let { events } of iter) {
|
|
274
|
-
if (performance.now() - startTime >= LSN_TIMEOUT_SECONDS * 1000) {
|
|
275
|
-
break;
|
|
276
|
-
}
|
|
277
|
-
if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
|
|
278
|
-
await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
279
|
-
lastCheckpointCreated = performance.now();
|
|
280
|
-
}
|
|
281
|
-
batchesSeen += 1;
|
|
282
|
-
|
|
283
|
-
for (let rawChangeDocument of events) {
|
|
284
|
-
const changeDocument = parseChangeDocument(rawChangeDocument);
|
|
285
|
-
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
286
|
-
|
|
287
|
-
if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
|
|
288
|
-
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
289
|
-
if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
290
|
-
continue;
|
|
291
|
-
}
|
|
292
|
-
const { comparable: lsn } = new MongoLSN({
|
|
293
|
-
timestamp: changeDocument.clusterTime!,
|
|
294
|
-
resume_token: changeDocument._id
|
|
295
|
-
});
|
|
296
|
-
return lsn;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
eventsSeen += 1;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// Could happen if there is a very large replication lag?
|
|
304
|
-
throw new ServiceError(
|
|
305
|
-
ErrorCode.PSYNC_S1301,
|
|
306
|
-
`Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}, batches = ${batchesSeen}`
|
|
307
|
-
);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
/**
|
|
311
|
-
* Given a snapshot LSN, validate that we can read from it, by opening a change stream.
|
|
312
|
-
*/
|
|
313
|
-
private async validateSnapshotLsn(lsn: string) {
|
|
314
|
-
const filters = this.getSourceNamespaceFilters();
|
|
315
|
-
const stream = this.rawChangeStreamBatches({
|
|
316
|
-
lsn: lsn,
|
|
317
|
-
// maxAwaitTimeMS should never actually be used here
|
|
318
|
-
maxAwaitTimeMS: 0,
|
|
319
|
-
filters
|
|
320
|
-
});
|
|
321
|
-
for await (let _batch of stream) {
|
|
322
|
-
// We got a response from the aggregate command, so consider the LSN valid.
|
|
323
|
-
// Close the stream immediately.
|
|
324
|
-
break;
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
async initialReplication(snapshotLsn: string | null) {
|
|
329
|
-
const sourceTables = this.sync_rules.getSourceTables();
|
|
330
|
-
await this.client.connect();
|
|
331
|
-
const tracer = new PerformanceTracer('MongoDB initial replication');
|
|
332
|
-
|
|
333
|
-
const flushResult = await this.storage.startBatch(
|
|
334
|
-
{
|
|
335
|
-
logger: this.logger,
|
|
336
|
-
zeroLSN: MongoLSN.ZERO.comparable,
|
|
337
|
-
defaultSchema: this.defaultDb.databaseName,
|
|
338
|
-
storeCurrentData: false,
|
|
339
|
-
skipExistingRows: true,
|
|
340
|
-
tracer
|
|
341
|
-
},
|
|
342
|
-
async (batch) => {
|
|
343
|
-
if (snapshotLsn == null) {
|
|
344
|
-
// First replication attempt - get a snapshot and store the timestamp
|
|
345
|
-
snapshotLsn = await this.getSnapshotLsn();
|
|
346
|
-
await batch.setResumeLsn(snapshotLsn);
|
|
347
|
-
this.logger.info(`Marking snapshot at ${snapshotLsn}`);
|
|
348
|
-
} else {
|
|
349
|
-
this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
|
|
350
|
-
// Check that the snapshot is still valid.
|
|
351
|
-
await this.validateSnapshotLsn(snapshotLsn);
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// Start by resolving all tables.
|
|
355
|
-
// This checks postImage configuration, and that should fail as
|
|
356
|
-
// early as possible.
|
|
357
|
-
let allSourceTables: SourceTable[] = [];
|
|
358
|
-
for (let tablePattern of sourceTables) {
|
|
359
|
-
const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
|
|
360
|
-
allSourceTables.push(...tables);
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
let tablesWithStatus: SourceTable[] = [];
|
|
364
|
-
for (let table of allSourceTables) {
|
|
365
|
-
if (table.snapshotComplete) {
|
|
366
|
-
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
367
|
-
continue;
|
|
368
|
-
}
|
|
369
|
-
let count = await this.estimatedCountNumber(table);
|
|
370
|
-
const updated = await batch.updateTableProgress(table, {
|
|
371
|
-
totalEstimatedCount: count
|
|
372
|
-
});
|
|
373
|
-
tablesWithStatus.push(updated);
|
|
374
|
-
this.relationCache.update(updated);
|
|
375
|
-
this.logger.info(
|
|
376
|
-
`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
|
|
377
|
-
);
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
for (let table of tablesWithStatus) {
|
|
381
|
-
await this.snapshotTable(batch, table);
|
|
382
|
-
await batch.markTableSnapshotDone([table]);
|
|
383
|
-
|
|
384
|
-
this.touch();
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
// The checkpoint here is a marker - we need to replicate up to at least this
|
|
388
|
-
// point before the data can be considered consistent.
|
|
389
|
-
// We could do this for each individual table, but may as well just do it once for the entire snapshot.
|
|
390
|
-
const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
|
|
391
|
-
await batch.markAllSnapshotDone(checkpoint);
|
|
392
|
-
|
|
393
|
-
// This will not create a consistent checkpoint yet, but will persist the op.
|
|
394
|
-
// Actual checkpoint will be created when streaming replication caught up.
|
|
395
|
-
await batch.commit(snapshotLsn);
|
|
396
|
-
|
|
397
|
-
this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
|
|
398
|
-
}
|
|
399
|
-
);
|
|
400
|
-
return { lastOpId: flushResult?.flushed_op };
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
private async setupCheckpointsCollection() {
|
|
404
|
-
const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION);
|
|
405
|
-
if (collection == null) {
|
|
406
|
-
await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
|
|
407
|
-
changeStreamPreAndPostImages: { enabled: true }
|
|
408
|
-
});
|
|
409
|
-
} else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) {
|
|
410
|
-
// Drop + create requires less permissions than collMod,
|
|
411
|
-
// and we don't care about the data in this collection.
|
|
412
|
-
await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION);
|
|
413
|
-
await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
|
|
414
|
-
changeStreamPreAndPostImages: { enabled: true }
|
|
415
|
-
});
|
|
416
|
-
} else {
|
|
417
|
-
// Clear the collection on startup, to keep it clean
|
|
418
|
-
// We never query this collection directly, and don't want to keep the data around.
|
|
419
|
-
// We only use this to get data into the oplog/changestream.
|
|
420
|
-
await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
|
|
421
|
-
}
|
|
422
|
-
}
|
|
423
|
-
|
|
424
167
|
private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } {
|
|
425
168
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
426
169
|
|
|
@@ -472,89 +215,14 @@ export class ChangeStream {
|
|
|
472
215
|
return { $match: nsFilter, multipleDatabases };
|
|
473
216
|
}
|
|
474
217
|
|
|
475
|
-
private async
|
|
476
|
-
const rowsReplicatedMetric = this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED);
|
|
477
|
-
const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
|
|
478
|
-
const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
|
|
479
|
-
|
|
480
|
-
const totalEstimatedCount = await this.estimatedCountNumber(table);
|
|
481
|
-
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
482
|
-
const db = this.client.db(table.schema);
|
|
483
|
-
const collection = db.collection(table.name);
|
|
484
|
-
await using query = new ChunkedSnapshotQuery({
|
|
485
|
-
collection,
|
|
486
|
-
key: table.snapshotStatus?.lastKey,
|
|
487
|
-
batchSize: this.snapshotChunkLength
|
|
488
|
-
});
|
|
489
|
-
if (query.lastKey != null) {
|
|
490
|
-
this.logger.info(
|
|
491
|
-
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
|
|
492
|
-
);
|
|
493
|
-
} else {
|
|
494
|
-
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
let lastBatch = performance.now();
|
|
498
|
-
let nextChunkPromise = query.nextChunk();
|
|
499
|
-
while (true) {
|
|
500
|
-
const { docs: docBatch, lastKey, bytes: chunkBytes } = await nextChunkPromise;
|
|
501
|
-
if (docBatch.length == 0) {
|
|
502
|
-
// No more data - stop iterating
|
|
503
|
-
break;
|
|
504
|
-
}
|
|
505
|
-
bytesReplicatedMetric.add(chunkBytes);
|
|
506
|
-
chunksReplicatedMetric.add(1);
|
|
507
|
-
|
|
508
|
-
if (this.abort_signal.aborted) {
|
|
509
|
-
throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason);
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
// Pre-fetch next batch, so that we can read and write concurrently
|
|
513
|
-
nextChunkPromise = query.nextChunk();
|
|
514
|
-
for (let buffer of docBatch) {
|
|
515
|
-
const { row: record, replicaId: replicaId } = this.rawToSqliteRow(buffer);
|
|
516
|
-
|
|
517
|
-
// This auto-flushes when the batch reaches its size limit
|
|
518
|
-
await batch.save({
|
|
519
|
-
tag: SaveOperationTag.INSERT,
|
|
520
|
-
sourceTable: table,
|
|
521
|
-
before: undefined,
|
|
522
|
-
beforeReplicaId: undefined,
|
|
523
|
-
after: record,
|
|
524
|
-
afterReplicaId: replicaId
|
|
525
|
-
});
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
// Important: flush before marking progress
|
|
529
|
-
await batch.flush();
|
|
530
|
-
at += docBatch.length;
|
|
531
|
-
rowsReplicatedMetric.add(docBatch.length);
|
|
532
|
-
|
|
533
|
-
table = await batch.updateTableProgress(table, {
|
|
534
|
-
lastKey,
|
|
535
|
-
replicatedCount: at,
|
|
536
|
-
totalEstimatedCount: totalEstimatedCount
|
|
537
|
-
});
|
|
538
|
-
this.relationCache.update(table);
|
|
539
|
-
|
|
540
|
-
const duration = performance.now() - lastBatch;
|
|
541
|
-
lastBatch = performance.now();
|
|
542
|
-
this.logger.info(
|
|
543
|
-
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
|
|
544
|
-
);
|
|
545
|
-
this.touch();
|
|
546
|
-
}
|
|
547
|
-
// In case the loop was interrupted, make sure we await the last promise.
|
|
548
|
-
await nextChunkPromise;
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
private async getRelation(
|
|
218
|
+
private async getRelations(
|
|
552
219
|
batch: storage.BucketStorageBatch,
|
|
553
220
|
descriptor: SourceEntityDescriptor,
|
|
554
221
|
options: { snapshot: boolean }
|
|
555
|
-
): Promise<SourceTable> {
|
|
556
|
-
const existing = this.relationCache.
|
|
222
|
+
): Promise<SourceTable[]> {
|
|
223
|
+
const existing = this.relationCache.getAll(descriptor);
|
|
557
224
|
if (existing != null) {
|
|
225
|
+
// We do this even when it's an empty result: Empty means nothing to sync, and we don't need to re-resolve.
|
|
558
226
|
return existing;
|
|
559
227
|
}
|
|
560
228
|
|
|
@@ -612,14 +280,11 @@ export class ChangeStream {
|
|
|
612
280
|
}
|
|
613
281
|
|
|
614
282
|
const snapshot = options.snapshot;
|
|
615
|
-
const result = await
|
|
616
|
-
group_id: this.group_id,
|
|
283
|
+
const result = await batch.resolveTables({
|
|
617
284
|
connection_id: this.connection_id,
|
|
618
|
-
|
|
619
|
-
entity_descriptor: descriptor,
|
|
620
|
-
sync_rules: this.sync_rules
|
|
285
|
+
source: descriptor
|
|
621
286
|
});
|
|
622
|
-
this.relationCache.
|
|
287
|
+
this.relationCache.updateAll(descriptor, result.tables);
|
|
623
288
|
|
|
624
289
|
// Drop conflicting collections.
|
|
625
290
|
// This is generally not expected for MongoDB source dbs, so we log an error.
|
|
@@ -634,20 +299,13 @@ export class ChangeStream {
|
|
|
634
299
|
// 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
|
|
635
300
|
// 2. Snapshot is not already done, AND:
|
|
636
301
|
// 3. The table is used in sync config.
|
|
637
|
-
const
|
|
638
|
-
if (
|
|
302
|
+
const snapshotCandidates = result.tables.filter((table) => snapshot && !table.snapshotComplete && table.syncAny);
|
|
303
|
+
if (snapshotCandidates.length > 0) {
|
|
639
304
|
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
|
|
640
|
-
|
|
641
|
-
await batch.truncate([result.table]);
|
|
642
|
-
|
|
643
|
-
await this.snapshotTable(batch, result.table);
|
|
644
|
-
const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
|
|
645
|
-
|
|
646
|
-
const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn);
|
|
647
|
-
return table;
|
|
305
|
+
await this.snapshotter.snapshotTables(batch, snapshotCandidates);
|
|
648
306
|
}
|
|
649
307
|
|
|
650
|
-
return result.
|
|
308
|
+
return result.tables;
|
|
651
309
|
}
|
|
652
310
|
|
|
653
311
|
async writeChange(
|
|
@@ -706,38 +364,65 @@ export class ChangeStream {
|
|
|
706
364
|
}
|
|
707
365
|
|
|
708
366
|
async replicate() {
|
|
367
|
+
let streamPromise: Promise<void> | null = null;
|
|
368
|
+
let loopPromise: Promise<void> | null = null;
|
|
709
369
|
try {
|
|
710
370
|
// If anything errors here, the entire replication process is halted, and
|
|
711
371
|
// all connections automatically closed, including this one.
|
|
712
|
-
|
|
713
|
-
await this.
|
|
372
|
+
this.initPromise = this.initReplication();
|
|
373
|
+
await this.initPromise;
|
|
374
|
+
loopPromise = this.snapshotter
|
|
375
|
+
.replicationLoop()
|
|
376
|
+
.then(() => {
|
|
377
|
+
throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`);
|
|
378
|
+
})
|
|
379
|
+
.catch((e) => {
|
|
380
|
+
this.abortController.abort(e);
|
|
381
|
+
throw e;
|
|
382
|
+
});
|
|
383
|
+
if (!this.snapshotter.supportsConcurrentSnapshots) {
|
|
384
|
+
await Promise.race([this.snapshotter.waitForInitialSnapshot(), loopPromise]);
|
|
385
|
+
}
|
|
386
|
+
streamPromise = this.streamChanges()
|
|
387
|
+
.then(() => {
|
|
388
|
+
throw new ReplicationAssertionError(`Replication stream exited unexpectedly`);
|
|
389
|
+
})
|
|
390
|
+
.catch((e) => {
|
|
391
|
+
this.abortController.abort(e);
|
|
392
|
+
throw e;
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
const results = await Promise.allSettled([loopPromise, streamPromise]);
|
|
396
|
+
throw replicationLoopError(results);
|
|
714
397
|
} catch (e) {
|
|
715
398
|
await this.storage.reportError(e);
|
|
716
399
|
throw e;
|
|
400
|
+
} finally {
|
|
401
|
+
this.abortController.abort();
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
public async waitForInitialSnapshot() {
|
|
406
|
+
if (this.initPromise == null) {
|
|
407
|
+
throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()');
|
|
717
408
|
}
|
|
409
|
+
await this.initPromise;
|
|
410
|
+
await this.snapshotter.waitForInitialSnapshot();
|
|
718
411
|
}
|
|
719
412
|
|
|
720
|
-
async initReplication() {
|
|
721
|
-
const result = await this.
|
|
722
|
-
await this.setupCheckpointsCollection();
|
|
413
|
+
private async initReplication() {
|
|
414
|
+
const result = await this.snapshotter.checkSlot();
|
|
415
|
+
await this.snapshotter.setupCheckpointsCollection();
|
|
723
416
|
if (result.needsInitialSync) {
|
|
724
417
|
if (result.snapshotLsn == null) {
|
|
725
418
|
// Snapshot LSN is not present, so we need to start replication from scratch.
|
|
726
|
-
await this.storage.clear({ signal: this.
|
|
727
|
-
}
|
|
728
|
-
const { lastOpId } = await this.initialReplication(result.snapshotLsn);
|
|
729
|
-
if (lastOpId != null) {
|
|
730
|
-
// Populate the cache _after_ initial replication, but _before_ we switch to this replication stream.
|
|
731
|
-
await this.storage.populatePersistentChecksumCache({
|
|
732
|
-
signal: this.abort_signal,
|
|
733
|
-
// No checkpoint yet, but we do have the opId.
|
|
734
|
-
maxOpId: lastOpId
|
|
735
|
-
});
|
|
419
|
+
await this.storage.clear({ signal: this.abortSignal });
|
|
736
420
|
}
|
|
421
|
+
await this.snapshotter.queueSnapshotTables(result.snapshotLsn);
|
|
737
422
|
}
|
|
738
423
|
}
|
|
739
424
|
|
|
740
|
-
async streamChanges() {
|
|
425
|
+
private async streamChanges() {
|
|
741
426
|
try {
|
|
742
427
|
await this.streamChangesInternal();
|
|
743
428
|
} catch (e) {
|
|
@@ -830,7 +515,9 @@ export class ChangeStream {
|
|
|
830
515
|
const bytesReplicatedMetric = this.metrics.getCounter(ReplicationMetric.DATA_REPLICATED_BYTES);
|
|
831
516
|
const chunksReplicatedMetric = this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED);
|
|
832
517
|
|
|
833
|
-
const tracer = new PerformanceTracer
|
|
518
|
+
const tracer = new PerformanceTracer<
|
|
519
|
+
'storage' | 'evaluate' | 'batch' | 'source_checkpoint' | 'changestream' | 'processing'
|
|
520
|
+
>('MongoDB streaming replication');
|
|
834
521
|
await this.storage.startBatch(
|
|
835
522
|
{
|
|
836
523
|
logger: this.logger,
|
|
@@ -838,6 +525,7 @@ export class ChangeStream {
|
|
|
838
525
|
defaultSchema: this.defaultDb.databaseName,
|
|
839
526
|
// We get a complete postimage for every change, so we don't need to store the current data.
|
|
840
527
|
storeCurrentData: false,
|
|
528
|
+
hooks: this.storageHooks,
|
|
841
529
|
tracer
|
|
842
530
|
},
|
|
843
531
|
async (batch) => {
|
|
@@ -860,7 +548,7 @@ export class ChangeStream {
|
|
|
860
548
|
const batchStream = this.rawChangeStreamBatches({
|
|
861
549
|
lsn: resumeFromLsn,
|
|
862
550
|
filters,
|
|
863
|
-
signal: this.
|
|
551
|
+
signal: this.abortSignal,
|
|
864
552
|
tracer
|
|
865
553
|
});
|
|
866
554
|
|
|
@@ -886,7 +574,7 @@ export class ChangeStream {
|
|
|
886
574
|
|
|
887
575
|
bytesReplicatedMetric.add(eventBatch.byteSize);
|
|
888
576
|
chunksReplicatedMetric.add(1);
|
|
889
|
-
if (this.
|
|
577
|
+
if (this.abortSignal.aborted) {
|
|
890
578
|
break;
|
|
891
579
|
}
|
|
892
580
|
this.touch();
|
|
@@ -920,7 +608,7 @@ export class ChangeStream {
|
|
|
920
608
|
for (let eventIndex = 0; eventIndex < events.length; eventIndex++) {
|
|
921
609
|
const rawChangeDocument = events[eventIndex];
|
|
922
610
|
const originalChangeDocument = parseChangeDocument(rawChangeDocument);
|
|
923
|
-
if (this.
|
|
611
|
+
if (this.abortSignal.aborted) {
|
|
924
612
|
break;
|
|
925
613
|
}
|
|
926
614
|
|
|
@@ -1027,11 +715,16 @@ export class ChangeStream {
|
|
|
1027
715
|
// change stream events, collapse standalone checkpoints into the normal batch
|
|
1028
716
|
// checkpoint flow to avoid commit churn under sustained load.
|
|
1029
717
|
const hasBufferedChanges = eventIndex < events.length - 1;
|
|
1030
|
-
if (waitForCheckpointLsn
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
718
|
+
if (hasBufferedChanges && waitForCheckpointLsn == null) {
|
|
719
|
+
// Buffered changes - create a new batch checkpoint to rate limit commits
|
|
720
|
+
using _ = tracer.span('source_checkpoint');
|
|
721
|
+
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
1034
722
|
continue;
|
|
723
|
+
} else if (waitForCheckpointLsn != null) {
|
|
724
|
+
// Skip this checkpoint - wait for the batch checkpoint.
|
|
725
|
+
continue;
|
|
726
|
+
} else {
|
|
727
|
+
// No buffered changes, and no batch checkpoint pending - commit immediately.
|
|
1035
728
|
}
|
|
1036
729
|
} else if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
1037
730
|
continue;
|
|
@@ -1068,18 +761,20 @@ export class ChangeStream {
|
|
|
1068
761
|
changeDocument.operationType == 'delete'
|
|
1069
762
|
) {
|
|
1070
763
|
if (waitForCheckpointLsn == null) {
|
|
764
|
+
using _ = tracer.span('source_checkpoint');
|
|
1071
765
|
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
1072
766
|
}
|
|
1073
767
|
|
|
1074
|
-
const rel = getMongoRelation(changeDocument.ns);
|
|
1075
|
-
const
|
|
768
|
+
const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
769
|
+
const tables = await this.getRelations(batch, rel, {
|
|
1076
770
|
// In most cases, we should not need to snapshot this. But if this is the first time we see the collection
|
|
1077
771
|
// for whatever reason, then we do need to snapshot it.
|
|
1078
772
|
// This may result in some duplicate operations when a collection is created for the first time after
|
|
1079
773
|
// sync config was deployed.
|
|
1080
774
|
snapshot: true
|
|
1081
775
|
});
|
|
1082
|
-
|
|
776
|
+
const tablesToReplicate = tables.filter((table) => table.syncAny);
|
|
777
|
+
if (tablesToReplicate.length > 0) {
|
|
1083
778
|
this.replicationLag.trackUncommittedChange(
|
|
1084
779
|
changeDocument.clusterTime == null ? null : timestampToDate(changeDocument.clusterTime)
|
|
1085
780
|
);
|
|
@@ -1094,29 +789,33 @@ export class ChangeStream {
|
|
|
1094
789
|
transactionsReplicatedMetric.add(1);
|
|
1095
790
|
}
|
|
1096
791
|
|
|
1097
|
-
|
|
792
|
+
for (const table of tablesToReplicate) {
|
|
793
|
+
await this.writeChange(batch, table, changeDocument);
|
|
794
|
+
}
|
|
1098
795
|
}
|
|
1099
796
|
} else if (changeDocument.operationType == 'drop') {
|
|
1100
|
-
const rel = getMongoRelation(changeDocument.ns);
|
|
1101
|
-
const
|
|
797
|
+
const rel = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
798
|
+
const tables = await this.getRelations(batch, rel, {
|
|
1102
799
|
// We're "dropping" this collection, so never snapshot it.
|
|
1103
800
|
snapshot: false
|
|
1104
801
|
});
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
802
|
+
const tablesToDrop = tables.filter((table) => table.syncAny);
|
|
803
|
+
if (tablesToDrop.length > 0) {
|
|
804
|
+
await batch.drop(tablesToDrop);
|
|
1108
805
|
}
|
|
806
|
+
this.relationCache.delete(rel);
|
|
1109
807
|
} else if (changeDocument.operationType == 'rename') {
|
|
1110
|
-
const relFrom = getMongoRelation(changeDocument.ns);
|
|
1111
|
-
const relTo = getMongoRelation(changeDocument.to);
|
|
1112
|
-
const
|
|
808
|
+
const relFrom = getMongoRelation(changeDocument.ns, this.connections.connectionTag);
|
|
809
|
+
const relTo = getMongoRelation(changeDocument.to, this.connections.connectionTag);
|
|
810
|
+
const tablesFrom = await this.getRelations(batch, relFrom, {
|
|
1113
811
|
// We're "dropping" this collection, so never snapshot it.
|
|
1114
812
|
snapshot: false
|
|
1115
813
|
});
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
814
|
+
const tablesToDrop = tablesFrom.filter((table) => table.syncAny);
|
|
815
|
+
if (tablesToDrop.length > 0) {
|
|
816
|
+
await batch.drop(tablesToDrop);
|
|
1119
817
|
}
|
|
818
|
+
this.relationCache.delete(relFrom);
|
|
1120
819
|
// Here we do need to snapshot the new table
|
|
1121
820
|
const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
|
|
1122
821
|
await this.handleRelation(batch, relTo, {
|
|
@@ -1139,8 +838,8 @@ export class ChangeStream {
|
|
|
1139
838
|
}
|
|
1140
839
|
|
|
1141
840
|
batchSpan.end();
|
|
1142
|
-
const
|
|
1143
|
-
const duration = batchSpan.
|
|
841
|
+
const durationsMicroseconds = outerSpan.end();
|
|
842
|
+
const duration = batchSpan.durationMillis;
|
|
1144
843
|
|
|
1145
844
|
this.logger.info(
|
|
1146
845
|
`Processed batch of ${events.length} changes / ${eventBatch.byteSize} bytes in ${duration}ms`,
|
|
@@ -1148,13 +847,15 @@ export class ChangeStream {
|
|
|
1148
847
|
count: events.length,
|
|
1149
848
|
bytes: eventBatch.byteSize,
|
|
1150
849
|
duration,
|
|
1151
|
-
t:
|
|
850
|
+
t: durationsMicroseconds
|
|
1152
851
|
}
|
|
1153
852
|
);
|
|
1154
853
|
outerSpan = tracer.span('batch');
|
|
1155
854
|
}
|
|
1156
855
|
}
|
|
1157
856
|
);
|
|
857
|
+
|
|
858
|
+
throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason);
|
|
1158
859
|
}
|
|
1159
860
|
|
|
1160
861
|
getReplicationLagMillis(): number | undefined {
|
|
@@ -1183,3 +884,26 @@ function transactionKey(doc: Pick<mongo.ChangeStreamDocument, 'lsid' | 'txnNumbe
|
|
|
1183
884
|
}
|
|
1184
885
|
return `${doc.lsid.id.toString('hex')}:${doc.txnNumber}`;
|
|
1185
886
|
}
|
|
887
|
+
|
|
888
|
+
/**
|
|
889
|
+
* Prioritize errors that are _not_ ReplicationAbortedError. Any error on either loopPromise or
|
|
890
|
+
* streamPromise aborts the other one, which then results in a ReplicationAbortedError, hiding the
|
|
891
|
+
* original cause.
|
|
892
|
+
*/
|
|
893
|
+
function replicationLoopError(results: PromiseSettledResult<any>[]): unknown {
|
|
894
|
+
// 1. Prioritize not ReplicationAbortedError.
|
|
895
|
+
for (const result of results) {
|
|
896
|
+
if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) {
|
|
897
|
+
return result.reason;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
// 2. Fallback to ReplicationAbortedError.
|
|
901
|
+
for (const result of results) {
|
|
902
|
+
if (result.status == 'rejected') {
|
|
903
|
+
// At this point only ReplicationAbortedError remains
|
|
904
|
+
return result.reason;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
// 3. Should never happen, but we cover this case.
|
|
908
|
+
return new ReplicationAssertionError(`Replication loop exited unexpectedly`);
|
|
909
|
+
}
|