@powersync/service-module-mongodb 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/api/MongoRouteAPIAdapter.d.ts +1 -2
- package/dist/api/MongoRouteAPIAdapter.js +3 -6
- package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
- package/dist/replication/ChangeStream.d.ts +27 -11
- package/dist/replication/ChangeStream.js +565 -288
- package/dist/replication/ChangeStream.js.map +1 -1
- package/dist/replication/ChangeStreamReplicationJob.d.ts +2 -0
- package/dist/replication/ChangeStreamReplicationJob.js +13 -5
- package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
- package/dist/replication/ChangeStreamReplicator.d.ts +1 -0
- package/dist/replication/ChangeStreamReplicator.js +21 -0
- package/dist/replication/ChangeStreamReplicator.js.map +1 -1
- package/dist/replication/MongoRelation.d.ts +9 -2
- package/dist/replication/MongoRelation.js +16 -5
- package/dist/replication/MongoRelation.js.map +1 -1
- package/dist/replication/MongoSnapshotQuery.d.ts +26 -0
- package/dist/replication/MongoSnapshotQuery.js +56 -0
- package/dist/replication/MongoSnapshotQuery.js.map +1 -0
- package/dist/replication/replication-utils.d.ts +2 -0
- package/dist/replication/replication-utils.js +3 -0
- package/dist/replication/replication-utils.js.map +1 -1
- package/package.json +9 -9
- package/src/api/MongoRouteAPIAdapter.ts +3 -7
- package/src/replication/ChangeStream.ts +371 -135
- package/src/replication/ChangeStreamReplicationJob.ts +14 -6
- package/src/replication/ChangeStreamReplicator.ts +23 -0
- package/src/replication/MongoRelation.ts +21 -6
- package/src/replication/MongoSnapshotQuery.ts +59 -0
- package/src/replication/replication-utils.ts +5 -0
- package/test/src/change_stream.test.ts +18 -13
- package/test/src/change_stream_utils.ts +47 -22
- package/test/src/chunked_snapshot.test.ts +153 -0
- package/test/src/resume.test.ts +7 -94
- package/test/src/resume_token.test.ts +78 -2
- package/test/src/resuming_snapshots.test.ts +138 -0
- package/test/src/slow_tests.test.ts +4 -18
- package/test/src/util.ts +12 -1
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -2,21 +2,36 @@ import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersyn
|
|
|
2
2
|
import {
|
|
3
3
|
container,
|
|
4
4
|
DatabaseConnectionError,
|
|
5
|
+
logger as defaultLogger,
|
|
5
6
|
ErrorCode,
|
|
6
|
-
|
|
7
|
+
Logger,
|
|
7
8
|
ReplicationAbortedError,
|
|
8
9
|
ReplicationAssertionError,
|
|
9
10
|
ServiceError
|
|
10
11
|
} from '@powersync/lib-services-framework';
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
MetricsEngine,
|
|
14
|
+
RelationCache,
|
|
15
|
+
SaveOperationTag,
|
|
16
|
+
SourceEntityDescriptor,
|
|
17
|
+
SourceTable,
|
|
18
|
+
storage
|
|
19
|
+
} from '@powersync/service-core';
|
|
12
20
|
import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules';
|
|
13
21
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
14
22
|
import { MongoLSN } from '../common/MongoLSN.js';
|
|
15
23
|
import { PostImagesOption } from '../types/types.js';
|
|
16
24
|
import { escapeRegExp } from '../utils.js';
|
|
17
25
|
import { MongoManager } from './MongoManager.js';
|
|
18
|
-
import {
|
|
19
|
-
|
|
26
|
+
import {
|
|
27
|
+
constructAfterRecord,
|
|
28
|
+
createCheckpoint,
|
|
29
|
+
getCacheIdentifier,
|
|
30
|
+
getMongoRelation,
|
|
31
|
+
STANDALONE_CHECKPOINT_ID
|
|
32
|
+
} from './MongoRelation.js';
|
|
33
|
+
import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
|
|
34
|
+
import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
|
|
20
35
|
|
|
21
36
|
export interface ChangeStreamOptions {
|
|
22
37
|
connections: MongoManager;
|
|
@@ -30,10 +45,18 @@ export interface ChangeStreamOptions {
|
|
|
30
45
|
* in closing the stream. To cover that case, reduce the timeout for tests.
|
|
31
46
|
*/
|
|
32
47
|
maxAwaitTimeMS?: number;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Override snapshotChunkLength for testing.
|
|
51
|
+
*/
|
|
52
|
+
snapshotChunkLength?: number;
|
|
53
|
+
|
|
54
|
+
logger?: Logger;
|
|
33
55
|
}
|
|
34
56
|
|
|
35
57
|
interface InitResult {
|
|
36
58
|
needsInitialSync: boolean;
|
|
59
|
+
snapshotLsn: string | null;
|
|
37
60
|
}
|
|
38
61
|
|
|
39
62
|
/**
|
|
@@ -67,7 +90,24 @@ export class ChangeStream {
|
|
|
67
90
|
|
|
68
91
|
private abort_signal: AbortSignal;
|
|
69
92
|
|
|
70
|
-
private
|
|
93
|
+
private relationCache = new RelationCache(getCacheIdentifier);
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
97
|
+
* This is used to determine the replication lag.
|
|
98
|
+
*/
|
|
99
|
+
private oldestUncommittedChange: Date | null = null;
|
|
100
|
+
/**
|
|
101
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
102
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
103
|
+
*/
|
|
104
|
+
private isStartingReplication = true;
|
|
105
|
+
|
|
106
|
+
private checkpointStreamId = new mongo.ObjectId();
|
|
107
|
+
|
|
108
|
+
private logger: Logger;
|
|
109
|
+
|
|
110
|
+
private snapshotChunkLength: number;
|
|
71
111
|
|
|
72
112
|
constructor(options: ChangeStreamOptions) {
|
|
73
113
|
this.storage = options.storage;
|
|
@@ -75,6 +115,7 @@ export class ChangeStream {
|
|
|
75
115
|
this.group_id = options.storage.group_id;
|
|
76
116
|
this.connections = options.connections;
|
|
77
117
|
this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
|
|
118
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
|
|
78
119
|
this.client = this.connections.client;
|
|
79
120
|
this.defaultDb = this.connections.db;
|
|
80
121
|
this.sync_rules = options.storage.getParsedSyncRules({
|
|
@@ -89,6 +130,8 @@ export class ChangeStream {
|
|
|
89
130
|
},
|
|
90
131
|
{ once: true }
|
|
91
132
|
);
|
|
133
|
+
|
|
134
|
+
this.logger = options.logger ?? defaultLogger;
|
|
92
135
|
}
|
|
93
136
|
|
|
94
137
|
get stopped() {
|
|
@@ -103,10 +146,6 @@ export class ChangeStream {
|
|
|
103
146
|
return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
|
|
104
147
|
}
|
|
105
148
|
|
|
106
|
-
private get logPrefix() {
|
|
107
|
-
return `[powersync_${this.group_id}]`;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
149
|
/**
|
|
111
150
|
* This resolves a pattern, persists the related metadata, and returns
|
|
112
151
|
* the resulting SourceTables.
|
|
@@ -142,7 +181,7 @@ export class ChangeStream {
|
|
|
142
181
|
.toArray();
|
|
143
182
|
|
|
144
183
|
if (!tablePattern.isWildcard && collections.length == 0) {
|
|
145
|
-
logger.warn(
|
|
184
|
+
this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
|
|
146
185
|
}
|
|
147
186
|
|
|
148
187
|
for (let collection of collections) {
|
|
@@ -162,37 +201,26 @@ export class ChangeStream {
|
|
|
162
201
|
async initSlot(): Promise<InitResult> {
|
|
163
202
|
const status = await this.storage.getStatus();
|
|
164
203
|
if (status.snapshot_done && status.checkpoint_lsn) {
|
|
165
|
-
logger.info(
|
|
166
|
-
return { needsInitialSync: false };
|
|
204
|
+
this.logger.info(`Initial replication already done`);
|
|
205
|
+
return { needsInitialSync: false, snapshotLsn: null };
|
|
167
206
|
}
|
|
168
207
|
|
|
169
|
-
return { needsInitialSync: true };
|
|
208
|
+
return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
|
|
170
209
|
}
|
|
171
210
|
|
|
172
211
|
async estimatedCount(table: storage.SourceTable): Promise<string> {
|
|
173
|
-
const
|
|
174
|
-
const count = await db.collection(table.table).estimatedDocumentCount();
|
|
212
|
+
const count = await this.estimatedCountNumber(table);
|
|
175
213
|
return `~${count}`;
|
|
176
214
|
}
|
|
177
215
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
* If (partial) replication was done before on this slot, this clears the state
|
|
182
|
-
* and starts again from scratch.
|
|
183
|
-
*/
|
|
184
|
-
async startInitialReplication() {
|
|
185
|
-
await this.storage.clear();
|
|
186
|
-
await this.initialReplication();
|
|
216
|
+
async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
|
|
217
|
+
const db = this.client.db(table.schema);
|
|
218
|
+
return await db.collection(table.table).estimatedDocumentCount();
|
|
187
219
|
}
|
|
188
220
|
|
|
189
|
-
async
|
|
190
|
-
const sourceTables = this.sync_rules.getSourceTables();
|
|
191
|
-
await this.client.connect();
|
|
192
|
-
|
|
193
|
-
// We need to get the snapshot time before taking the initial snapshot.
|
|
221
|
+
private async getSnapshotLsn(): Promise<string> {
|
|
194
222
|
const hello = await this.defaultDb.command({ hello: 1 });
|
|
195
|
-
|
|
223
|
+
// Basic sanity check
|
|
196
224
|
if (hello.msg == 'isdbgrid') {
|
|
197
225
|
throw new ServiceError(
|
|
198
226
|
ErrorCode.PSYNC_S1341,
|
|
@@ -203,33 +231,139 @@ export class ChangeStream {
|
|
|
203
231
|
ErrorCode.PSYNC_S1342,
|
|
204
232
|
'Standalone MongoDB instances are not supported - use a replicaset.'
|
|
205
233
|
);
|
|
206
|
-
} else if (snapshotTime == null) {
|
|
207
|
-
// Not known where this would happen apart from the above cases
|
|
208
|
-
throw new ReplicationAssertionError('MongoDB lastWrite timestamp not found.');
|
|
209
234
|
}
|
|
210
235
|
|
|
236
|
+
// Open a change stream just to get a resume token for later use.
|
|
237
|
+
// We could use clusterTime from the hello command, but that won't tell us if the
|
|
238
|
+
// snapshot isn't valid anymore.
|
|
239
|
+
// If we just use the first resumeToken from the stream, we get two potential issues:
|
|
240
|
+
// 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
|
|
241
|
+
// in source db or other stream issues.
|
|
242
|
+
// 2. The first actual change we get may have the same clusterTime, causing us to incorrect
|
|
243
|
+
// skip that event.
|
|
244
|
+
// Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
|
|
245
|
+
// To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
|
|
246
|
+
// periodically until the timeout is reached.
|
|
247
|
+
|
|
248
|
+
const LSN_TIMEOUT_SECONDS = 60;
|
|
249
|
+
const LSN_CREATE_INTERVAL_SECONDS = 1;
|
|
250
|
+
|
|
251
|
+
await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 });
|
|
252
|
+
const { stream } = streamManager;
|
|
253
|
+
const startTime = performance.now();
|
|
254
|
+
let lastCheckpointCreated = -10_000;
|
|
255
|
+
let eventsSeen = 0;
|
|
256
|
+
|
|
257
|
+
while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
|
|
258
|
+
if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
|
|
259
|
+
await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
260
|
+
lastCheckpointCreated = performance.now();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
264
|
+
const changeDocument = await stream.tryNext().catch((e) => {
|
|
265
|
+
throw mapChangeStreamError(e);
|
|
266
|
+
});
|
|
267
|
+
if (changeDocument == null) {
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
272
|
+
|
|
273
|
+
if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
|
|
274
|
+
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
275
|
+
if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
const { comparable: lsn } = new MongoLSN({
|
|
279
|
+
timestamp: changeDocument.clusterTime!,
|
|
280
|
+
resume_token: changeDocument._id
|
|
281
|
+
});
|
|
282
|
+
return lsn;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
eventsSeen += 1;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Could happen if there is a very large replication lag?
|
|
289
|
+
throw new ServiceError(
|
|
290
|
+
ErrorCode.PSYNC_S1301,
|
|
291
|
+
`Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
private async validateSnapshotLsn(lsn: string) {
|
|
296
|
+
await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
|
|
297
|
+
const { stream } = streamManager;
|
|
298
|
+
try {
|
|
299
|
+
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
300
|
+
await stream.tryNext();
|
|
301
|
+
} catch (e) {
|
|
302
|
+
// Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
|
|
303
|
+
// we possibly cannot recover from it.
|
|
304
|
+
throw mapChangeStreamError(e);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
async initialReplication(snapshotLsn: string | null) {
|
|
309
|
+
const sourceTables = this.sync_rules.getSourceTables();
|
|
310
|
+
await this.client.connect();
|
|
311
|
+
|
|
211
312
|
await this.storage.startBatch(
|
|
212
|
-
{
|
|
313
|
+
{
|
|
314
|
+
logger: this.logger,
|
|
315
|
+
zeroLSN: MongoLSN.ZERO.comparable,
|
|
316
|
+
defaultSchema: this.defaultDb.databaseName,
|
|
317
|
+
storeCurrentData: false,
|
|
318
|
+
skipExistingRows: true
|
|
319
|
+
},
|
|
213
320
|
async (batch) => {
|
|
321
|
+
if (snapshotLsn == null) {
|
|
322
|
+
// First replication attempt - get a snapshot and store the timestamp
|
|
323
|
+
snapshotLsn = await this.getSnapshotLsn();
|
|
324
|
+
await batch.setSnapshotLsn(snapshotLsn);
|
|
325
|
+
this.logger.info(`Marking snapshot at ${snapshotLsn}`);
|
|
326
|
+
} else {
|
|
327
|
+
this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
|
|
328
|
+
// Check that the snapshot is still valid.
|
|
329
|
+
await this.validateSnapshotLsn(snapshotLsn);
|
|
330
|
+
}
|
|
331
|
+
|
|
214
332
|
// Start by resolving all tables.
|
|
215
333
|
// This checks postImage configuration, and that should fail as
|
|
216
|
-
//
|
|
334
|
+
// early as possible.
|
|
217
335
|
let allSourceTables: SourceTable[] = [];
|
|
218
336
|
for (let tablePattern of sourceTables) {
|
|
219
337
|
const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
|
|
220
338
|
allSourceTables.push(...tables);
|
|
221
339
|
}
|
|
222
340
|
|
|
341
|
+
let tablesWithStatus: SourceTable[] = [];
|
|
223
342
|
for (let table of allSourceTables) {
|
|
343
|
+
if (table.snapshotComplete) {
|
|
344
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
let count = await this.estimatedCountNumber(table);
|
|
348
|
+
const updated = await batch.updateTableProgress(table, {
|
|
349
|
+
totalEstimatedCount: count
|
|
350
|
+
});
|
|
351
|
+
tablesWithStatus.push(updated);
|
|
352
|
+
this.relationCache.update(updated);
|
|
353
|
+
this.logger.info(
|
|
354
|
+
`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
|
|
355
|
+
);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
for (let table of tablesWithStatus) {
|
|
224
359
|
await this.snapshotTable(batch, table);
|
|
225
360
|
await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable);
|
|
226
361
|
|
|
227
362
|
await touch();
|
|
228
363
|
}
|
|
229
364
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
await batch.commit(lsn);
|
|
365
|
+
this.logger.info(`Snapshot commit at ${snapshotLsn}`);
|
|
366
|
+
await batch.commit(snapshotLsn);
|
|
233
367
|
}
|
|
234
368
|
);
|
|
235
369
|
}
|
|
@@ -247,6 +381,11 @@ export class ChangeStream {
|
|
|
247
381
|
await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, {
|
|
248
382
|
changeStreamPreAndPostImages: { enabled: true }
|
|
249
383
|
});
|
|
384
|
+
} else {
|
|
385
|
+
// Clear the collection on startup, to keep it clean
|
|
386
|
+
// We never query this collection directly, and don't want to keep the data around.
|
|
387
|
+
// We only use this to get data into the oplog/changestream.
|
|
388
|
+
await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({});
|
|
250
389
|
}
|
|
251
390
|
}
|
|
252
391
|
|
|
@@ -290,27 +429,38 @@ export class ChangeStream {
|
|
|
290
429
|
}
|
|
291
430
|
|
|
292
431
|
private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
let at = 0;
|
|
432
|
+
const totalEstimatedCount = await this.estimatedCountNumber(table);
|
|
433
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
296
434
|
const db = this.client.db(table.schema);
|
|
297
435
|
const collection = db.collection(table.table);
|
|
298
|
-
|
|
436
|
+
await using query = new ChunkedSnapshotQuery({
|
|
437
|
+
collection,
|
|
438
|
+
key: table.snapshotStatus?.lastKey,
|
|
439
|
+
batchSize: this.snapshotChunkLength
|
|
440
|
+
});
|
|
441
|
+
if (query.lastKey != null) {
|
|
442
|
+
this.logger.info(
|
|
443
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
|
|
444
|
+
);
|
|
445
|
+
} else {
|
|
446
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
447
|
+
}
|
|
299
448
|
|
|
300
449
|
let lastBatch = performance.now();
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
450
|
+
let nextChunkPromise = query.nextChunk();
|
|
451
|
+
while (true) {
|
|
452
|
+
const { docs: docBatch, lastKey } = await nextChunkPromise;
|
|
453
|
+
if (docBatch.length == 0) {
|
|
454
|
+
break;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (this.abort_signal.aborted) {
|
|
458
|
+
throw new ReplicationAbortedError(`Aborted initial replication`);
|
|
459
|
+
}
|
|
460
|
+
|
|
307
461
|
// Pre-fetch next batch, so that we can read and write concurrently
|
|
308
|
-
|
|
462
|
+
nextChunkPromise = query.nextChunk();
|
|
309
463
|
for (let document of docBatch) {
|
|
310
|
-
if (this.abort_signal.aborted) {
|
|
311
|
-
throw new ReplicationAbortedError(`Aborted initial replication`);
|
|
312
|
-
}
|
|
313
|
-
|
|
314
464
|
const record = constructAfterRecord(document);
|
|
315
465
|
|
|
316
466
|
// This auto-flushes when the batch reaches its size limit
|
|
@@ -324,20 +474,27 @@ export class ChangeStream {
|
|
|
324
474
|
});
|
|
325
475
|
}
|
|
326
476
|
|
|
477
|
+
// Important: flush before marking progress
|
|
478
|
+
await batch.flush();
|
|
327
479
|
at += docBatch.length;
|
|
328
480
|
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length);
|
|
481
|
+
|
|
482
|
+
table = await batch.updateTableProgress(table, {
|
|
483
|
+
lastKey,
|
|
484
|
+
replicatedCount: at,
|
|
485
|
+
totalEstimatedCount: totalEstimatedCount
|
|
486
|
+
});
|
|
487
|
+
this.relationCache.update(table);
|
|
488
|
+
|
|
329
489
|
const duration = performance.now() - lastBatch;
|
|
330
490
|
lastBatch = performance.now();
|
|
331
|
-
logger.info(
|
|
332
|
-
|
|
491
|
+
this.logger.info(
|
|
492
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
|
|
333
493
|
);
|
|
334
494
|
await touch();
|
|
335
495
|
}
|
|
336
496
|
// In case the loop was interrupted, make sure we await the last promise.
|
|
337
|
-
await
|
|
338
|
-
|
|
339
|
-
await batch.flush();
|
|
340
|
-
logger.info(`${this.logPrefix} Replicated ${at} documents for ${table.qualifiedName}`);
|
|
497
|
+
await nextChunkPromise;
|
|
341
498
|
}
|
|
342
499
|
|
|
343
500
|
private async getRelation(
|
|
@@ -345,8 +502,7 @@ export class ChangeStream {
|
|
|
345
502
|
descriptor: SourceEntityDescriptor,
|
|
346
503
|
options: { snapshot: boolean }
|
|
347
504
|
): Promise<SourceTable> {
|
|
348
|
-
const
|
|
349
|
-
const existing = this.relation_cache.get(cacheId);
|
|
505
|
+
const existing = this.relationCache.get(descriptor);
|
|
350
506
|
if (existing != null) {
|
|
351
507
|
return existing;
|
|
352
508
|
}
|
|
@@ -386,7 +542,7 @@ export class ChangeStream {
|
|
|
386
542
|
collMod: collectionInfo.name,
|
|
387
543
|
changeStreamPreAndPostImages: { enabled: true }
|
|
388
544
|
});
|
|
389
|
-
logger.info(
|
|
545
|
+
this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`);
|
|
390
546
|
} else if (!enabled) {
|
|
391
547
|
throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`);
|
|
392
548
|
}
|
|
@@ -412,12 +568,12 @@ export class ChangeStream {
|
|
|
412
568
|
entity_descriptor: descriptor,
|
|
413
569
|
sync_rules: this.sync_rules
|
|
414
570
|
});
|
|
415
|
-
this.
|
|
571
|
+
this.relationCache.update(result.table);
|
|
416
572
|
|
|
417
573
|
// Drop conflicting collections.
|
|
418
574
|
// This is generally not expected for MongoDB source dbs, so we log an error.
|
|
419
575
|
if (result.dropTables.length > 0) {
|
|
420
|
-
logger.error(
|
|
576
|
+
this.logger.error(
|
|
421
577
|
`Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
|
|
422
578
|
);
|
|
423
579
|
await batch.drop(result.dropTables);
|
|
@@ -429,12 +585,12 @@ export class ChangeStream {
|
|
|
429
585
|
// 3. The table is used in sync rules.
|
|
430
586
|
const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
|
|
431
587
|
if (shouldSnapshot) {
|
|
432
|
-
logger.info(
|
|
588
|
+
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
|
|
433
589
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
434
590
|
await batch.truncate([result.table]);
|
|
435
591
|
|
|
436
592
|
await this.snapshotTable(batch, result.table);
|
|
437
|
-
const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb);
|
|
593
|
+
const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
|
|
438
594
|
|
|
439
595
|
const [table] = await batch.markSnapshotDone([result.table], no_checkpoint_before_lsn);
|
|
440
596
|
return table;
|
|
@@ -449,7 +605,7 @@ export class ChangeStream {
|
|
|
449
605
|
change: mongo.ChangeStreamDocument
|
|
450
606
|
): Promise<storage.FlushedResult | null> {
|
|
451
607
|
if (!table.syncAny) {
|
|
452
|
-
logger.debug(
|
|
608
|
+
this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`);
|
|
453
609
|
return null;
|
|
454
610
|
}
|
|
455
611
|
|
|
@@ -512,7 +668,11 @@ export class ChangeStream {
|
|
|
512
668
|
const result = await this.initSlot();
|
|
513
669
|
await this.setupCheckpointsCollection();
|
|
514
670
|
if (result.needsInitialSync) {
|
|
515
|
-
|
|
671
|
+
if (result.snapshotLsn == null) {
|
|
672
|
+
// Snapshot LSN is not present, so we need to start replication from scratch.
|
|
673
|
+
await this.storage.clear({ signal: this.abort_signal });
|
|
674
|
+
}
|
|
675
|
+
await this.initialReplication(result.snapshotLsn);
|
|
516
676
|
}
|
|
517
677
|
}
|
|
518
678
|
|
|
@@ -531,77 +691,107 @@ export class ChangeStream {
|
|
|
531
691
|
}
|
|
532
692
|
}
|
|
533
693
|
|
|
694
|
+
private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) {
|
|
695
|
+
const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
|
|
696
|
+
const startAfter = lastLsn?.timestamp;
|
|
697
|
+
const resumeAfter = lastLsn?.resumeToken;
|
|
698
|
+
|
|
699
|
+
const filters = this.getSourceNamespaceFilters();
|
|
700
|
+
|
|
701
|
+
const pipeline: mongo.Document[] = [
|
|
702
|
+
{
|
|
703
|
+
$match: filters.$match
|
|
704
|
+
},
|
|
705
|
+
{ $changeStreamSplitLargeEvent: {} }
|
|
706
|
+
];
|
|
707
|
+
|
|
708
|
+
let fullDocument: 'required' | 'updateLookup';
|
|
709
|
+
|
|
710
|
+
if (this.usePostImages) {
|
|
711
|
+
// 'read_only' or 'auto_configure'
|
|
712
|
+
// Configuration happens during snapshot, or when we see new
|
|
713
|
+
// collections.
|
|
714
|
+
fullDocument = 'required';
|
|
715
|
+
} else {
|
|
716
|
+
fullDocument = 'updateLookup';
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
const streamOptions: mongo.ChangeStreamOptions = {
|
|
720
|
+
showExpandedEvents: true,
|
|
721
|
+
maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS,
|
|
722
|
+
fullDocument: fullDocument
|
|
723
|
+
};
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Only one of these options can be supplied at a time.
|
|
727
|
+
*/
|
|
728
|
+
if (resumeAfter) {
|
|
729
|
+
streamOptions.resumeAfter = resumeAfter;
|
|
730
|
+
} else {
|
|
731
|
+
// Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the
|
|
732
|
+
// case if we have an old one.
|
|
733
|
+
streamOptions.startAtOperationTime = startAfter;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
let stream: mongo.ChangeStream<mongo.Document>;
|
|
737
|
+
if (filters.multipleDatabases) {
|
|
738
|
+
// Requires readAnyDatabase@admin on Atlas
|
|
739
|
+
stream = this.client.watch(pipeline, streamOptions);
|
|
740
|
+
} else {
|
|
741
|
+
// Same general result, but requires less permissions than the above
|
|
742
|
+
stream = this.defaultDb.watch(pipeline, streamOptions);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
this.abort_signal.addEventListener('abort', () => {
|
|
746
|
+
stream.close();
|
|
747
|
+
});
|
|
748
|
+
|
|
749
|
+
return {
|
|
750
|
+
stream,
|
|
751
|
+
filters,
|
|
752
|
+
[Symbol.asyncDispose]: async () => {
|
|
753
|
+
return stream.close();
|
|
754
|
+
}
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
|
|
534
758
|
async streamChangesInternal() {
|
|
535
759
|
// Auto-activate as soon as initial replication is done
|
|
536
760
|
await this.storage.autoActivate();
|
|
537
761
|
|
|
538
762
|
await this.storage.startBatch(
|
|
539
|
-
{
|
|
763
|
+
{
|
|
764
|
+
logger: this.logger,
|
|
765
|
+
zeroLSN: MongoLSN.ZERO.comparable,
|
|
766
|
+
defaultSchema: this.defaultDb.databaseName,
|
|
767
|
+
storeCurrentData: false
|
|
768
|
+
},
|
|
540
769
|
async (batch) => {
|
|
541
770
|
const { lastCheckpointLsn } = batch;
|
|
542
|
-
const lastLsn =
|
|
771
|
+
const lastLsn = MongoLSN.fromSerialized(lastCheckpointLsn!);
|
|
543
772
|
const startAfter = lastLsn?.timestamp;
|
|
544
|
-
const resumeAfter = lastLsn?.resumeToken;
|
|
545
773
|
|
|
546
|
-
|
|
774
|
+
// It is normal for this to be a minute or two old when there is a low volume
|
|
775
|
+
// of ChangeStream events.
|
|
776
|
+
const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
|
|
547
777
|
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
const pipeline: mongo.Document[] = [
|
|
551
|
-
{
|
|
552
|
-
$match: filters.$match
|
|
553
|
-
},
|
|
554
|
-
{ $changeStreamSplitLargeEvent: {} }
|
|
555
|
-
];
|
|
556
|
-
|
|
557
|
-
let fullDocument: 'required' | 'updateLookup';
|
|
558
|
-
|
|
559
|
-
if (this.usePostImages) {
|
|
560
|
-
// 'read_only' or 'auto_configure'
|
|
561
|
-
// Configuration happens during snapshot, or when we see new
|
|
562
|
-
// collections.
|
|
563
|
-
fullDocument = 'required';
|
|
564
|
-
} else {
|
|
565
|
-
fullDocument = 'updateLookup';
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
const streamOptions: mongo.ChangeStreamOptions = {
|
|
569
|
-
showExpandedEvents: true,
|
|
570
|
-
maxAwaitTimeMS: this.maxAwaitTimeMS,
|
|
571
|
-
fullDocument: fullDocument
|
|
572
|
-
};
|
|
573
|
-
|
|
574
|
-
/**
|
|
575
|
-
* Only one of these options can be supplied at a time.
|
|
576
|
-
*/
|
|
577
|
-
if (resumeAfter) {
|
|
578
|
-
streamOptions.resumeAfter = resumeAfter;
|
|
579
|
-
} else {
|
|
580
|
-
streamOptions.startAtOperationTime = startAfter;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
let stream: mongo.ChangeStream<mongo.Document>;
|
|
584
|
-
if (filters.multipleDatabases) {
|
|
585
|
-
// Requires readAnyDatabase@admin on Atlas
|
|
586
|
-
stream = this.client.watch(pipeline, streamOptions);
|
|
587
|
-
} else {
|
|
588
|
-
// Same general result, but requires less permissions than the above
|
|
589
|
-
stream = this.defaultDb.watch(pipeline, streamOptions);
|
|
590
|
-
}
|
|
778
|
+
this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
|
|
591
779
|
|
|
780
|
+
await using streamManager = this.openChangeStream({ lsn: lastCheckpointLsn });
|
|
781
|
+
const { stream, filters } = streamManager;
|
|
592
782
|
if (this.abort_signal.aborted) {
|
|
593
|
-
stream.close();
|
|
783
|
+
await stream.close();
|
|
594
784
|
return;
|
|
595
785
|
}
|
|
596
786
|
|
|
597
|
-
this.abort_signal.addEventListener('abort', () => {
|
|
598
|
-
stream.close();
|
|
599
|
-
});
|
|
600
|
-
|
|
601
787
|
// Always start with a checkpoint.
|
|
602
788
|
// This helps us to clear errors when restarting, even if there is
|
|
603
789
|
// no data to replicate.
|
|
604
|
-
let waitForCheckpointLsn: string | null = await createCheckpoint(
|
|
790
|
+
let waitForCheckpointLsn: string | null = await createCheckpoint(
|
|
791
|
+
this.client,
|
|
792
|
+
this.defaultDb,
|
|
793
|
+
this.checkpointStreamId
|
|
794
|
+
);
|
|
605
795
|
|
|
606
796
|
let splitDocument: mongo.ChangeStreamDocument | null = null;
|
|
607
797
|
|
|
@@ -638,10 +828,16 @@ export class ChangeStream {
|
|
|
638
828
|
// We add an additional check for waitForCheckpointLsn == null, to make sure we're not
|
|
639
829
|
// doing a keepalive in the middle of a transaction.
|
|
640
830
|
if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
|
|
641
|
-
const { comparable: lsn } = MongoLSN.fromResumeToken(stream.resumeToken);
|
|
831
|
+
const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
|
|
642
832
|
await batch.keepalive(lsn);
|
|
643
833
|
await touch();
|
|
644
834
|
lastEmptyResume = performance.now();
|
|
835
|
+
// Log the token update. This helps as a general "replication is still active" message in the logs.
|
|
836
|
+
// This token would typically be around 10s behind.
|
|
837
|
+
this.logger.info(
|
|
838
|
+
`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`
|
|
839
|
+
);
|
|
840
|
+
this.isStartingReplication = false;
|
|
645
841
|
}
|
|
646
842
|
continue;
|
|
647
843
|
}
|
|
@@ -694,19 +890,15 @@ export class ChangeStream {
|
|
|
694
890
|
|
|
695
891
|
if (!flexDbNameWorkaroundLogged) {
|
|
696
892
|
flexDbNameWorkaroundLogged = true;
|
|
697
|
-
logger.warn(
|
|
698
|
-
|
|
893
|
+
this.logger.warn(
|
|
894
|
+
`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
|
|
699
895
|
);
|
|
700
896
|
}
|
|
701
897
|
}
|
|
702
898
|
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
changeDocument.operationType == 'replace' ||
|
|
707
|
-
changeDocument.operationType == 'drop') &&
|
|
708
|
-
changeDocument.ns.coll == CHECKPOINTS_COLLECTION
|
|
709
|
-
) {
|
|
899
|
+
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
900
|
+
|
|
901
|
+
if (ns?.coll == CHECKPOINTS_COLLECTION) {
|
|
710
902
|
/**
|
|
711
903
|
* Dropping the database does not provide an `invalidate` event.
|
|
712
904
|
* We typically would receive `drop` events for the collection which we
|
|
@@ -727,6 +919,29 @@ export class ChangeStream {
|
|
|
727
919
|
);
|
|
728
920
|
}
|
|
729
921
|
|
|
922
|
+
if (
|
|
923
|
+
!(
|
|
924
|
+
changeDocument.operationType == 'insert' ||
|
|
925
|
+
changeDocument.operationType == 'update' ||
|
|
926
|
+
changeDocument.operationType == 'replace'
|
|
927
|
+
)
|
|
928
|
+
) {
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
// We handle two types of checkpoint events:
|
|
933
|
+
// 1. "Standalone" checkpoints, typically write checkpoints. We want to process these
|
|
934
|
+
// immediately, regardless of where they were created.
|
|
935
|
+
// 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate
|
|
936
|
+
// limiting of commits, so we specifically want to exclude checkpoints from other streams.
|
|
937
|
+
//
|
|
938
|
+
// It may be useful to also throttle commits due to standalone checkpoints in the future.
|
|
939
|
+
// However, these typically have a much lower rate than batch checkpoints, so we don't do that for now.
|
|
940
|
+
|
|
941
|
+
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
942
|
+
if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) {
|
|
943
|
+
continue;
|
|
944
|
+
}
|
|
730
945
|
const { comparable: lsn } = new MongoLSN({
|
|
731
946
|
timestamp: changeDocument.clusterTime!,
|
|
732
947
|
resume_token: changeDocument._id
|
|
@@ -735,7 +950,12 @@ export class ChangeStream {
|
|
|
735
950
|
if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
|
|
736
951
|
waitForCheckpointLsn = null;
|
|
737
952
|
}
|
|
738
|
-
await batch.commit(lsn);
|
|
953
|
+
const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange });
|
|
954
|
+
|
|
955
|
+
if (didCommit) {
|
|
956
|
+
this.oldestUncommittedChange = null;
|
|
957
|
+
this.isStartingReplication = false;
|
|
958
|
+
}
|
|
739
959
|
} else if (
|
|
740
960
|
changeDocument.operationType == 'insert' ||
|
|
741
961
|
changeDocument.operationType == 'update' ||
|
|
@@ -743,7 +963,7 @@ export class ChangeStream {
|
|
|
743
963
|
changeDocument.operationType == 'delete'
|
|
744
964
|
) {
|
|
745
965
|
if (waitForCheckpointLsn == null) {
|
|
746
|
-
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb);
|
|
966
|
+
waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
747
967
|
}
|
|
748
968
|
const rel = getMongoRelation(changeDocument.ns);
|
|
749
969
|
const table = await this.getRelation(batch, rel, {
|
|
@@ -754,6 +974,9 @@ export class ChangeStream {
|
|
|
754
974
|
snapshot: true
|
|
755
975
|
});
|
|
756
976
|
if (table.syncAny) {
|
|
977
|
+
if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
|
|
978
|
+
this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
|
|
979
|
+
}
|
|
757
980
|
await this.writeChange(batch, table, changeDocument);
|
|
758
981
|
}
|
|
759
982
|
} else if (changeDocument.operationType == 'drop') {
|
|
@@ -764,7 +987,7 @@ export class ChangeStream {
|
|
|
764
987
|
});
|
|
765
988
|
if (table.syncAny) {
|
|
766
989
|
await batch.drop([table]);
|
|
767
|
-
this.
|
|
990
|
+
this.relationCache.delete(table);
|
|
768
991
|
}
|
|
769
992
|
} else if (changeDocument.operationType == 'rename') {
|
|
770
993
|
const relFrom = getMongoRelation(changeDocument.ns);
|
|
@@ -775,7 +998,7 @@ export class ChangeStream {
|
|
|
775
998
|
});
|
|
776
999
|
if (tableFrom.syncAny) {
|
|
777
1000
|
await batch.drop([tableFrom]);
|
|
778
|
-
this.
|
|
1001
|
+
this.relationCache.delete(relFrom);
|
|
779
1002
|
}
|
|
780
1003
|
// Here we do need to snapshot the new table
|
|
781
1004
|
const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
|
|
@@ -789,6 +1012,19 @@ export class ChangeStream {
|
|
|
789
1012
|
}
|
|
790
1013
|
);
|
|
791
1014
|
}
|
|
1015
|
+
|
|
1016
|
+
async getReplicationLagMillis(): Promise<number | undefined> {
|
|
1017
|
+
if (this.oldestUncommittedChange == null) {
|
|
1018
|
+
if (this.isStartingReplication) {
|
|
1019
|
+
// We don't have anything to compute replication lag with yet.
|
|
1020
|
+
return undefined;
|
|
1021
|
+
} else {
|
|
1022
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
1023
|
+
return 0;
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
1027
|
+
}
|
|
792
1028
|
}
|
|
793
1029
|
|
|
794
1030
|
async function touch() {
|