@powersync/service-module-mongodb 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dist/api/MongoRouteAPIAdapter.d.ts +1 -1
- package/dist/api/MongoRouteAPIAdapter.js +1 -1
- package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
- package/dist/replication/ChangeStream.d.ts +26 -11
- package/dist/replication/ChangeStream.js +556 -300
- package/dist/replication/ChangeStream.js.map +1 -1
- package/dist/replication/ChangeStreamReplicationJob.d.ts +2 -0
- package/dist/replication/ChangeStreamReplicationJob.js +13 -5
- package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
- package/dist/replication/ChangeStreamReplicator.d.ts +1 -0
- package/dist/replication/ChangeStreamReplicator.js +21 -0
- package/dist/replication/ChangeStreamReplicator.js.map +1 -1
- package/dist/replication/MongoRelation.d.ts +1 -1
- package/dist/replication/MongoRelation.js +4 -0
- package/dist/replication/MongoRelation.js.map +1 -1
- package/dist/replication/MongoSnapshotQuery.d.ts +26 -0
- package/dist/replication/MongoSnapshotQuery.js +56 -0
- package/dist/replication/MongoSnapshotQuery.js.map +1 -0
- package/dist/replication/replication-utils.d.ts +2 -0
- package/dist/replication/replication-utils.js +3 -0
- package/dist/replication/replication-utils.js.map +1 -1
- package/package.json +8 -8
- package/src/api/MongoRouteAPIAdapter.ts +1 -1
- package/src/replication/ChangeStream.ts +324 -124
- package/src/replication/ChangeStreamReplicationJob.ts +14 -6
- package/src/replication/ChangeStreamReplicator.ts +23 -0
- package/src/replication/MongoRelation.ts +4 -1
- package/src/replication/MongoSnapshotQuery.ts +59 -0
- package/src/replication/replication-utils.ts +5 -0
- package/test/src/change_stream.test.ts +18 -13
- package/test/src/change_stream_utils.ts +45 -20
- package/test/src/chunked_snapshot.test.ts +153 -0
- package/test/src/resume.test.ts +7 -94
- package/test/src/resume_token.test.ts +78 -2
- package/test/src/resuming_snapshots.test.ts +138 -0
- package/test/src/slow_tests.test.ts +4 -18
- package/test/src/util.ts +12 -1
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -2,13 +2,21 @@ import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersyn
|
|
|
2
2
|
import {
|
|
3
3
|
container,
|
|
4
4
|
DatabaseConnectionError,
|
|
5
|
+
logger as defaultLogger,
|
|
5
6
|
ErrorCode,
|
|
6
|
-
|
|
7
|
+
Logger,
|
|
7
8
|
ReplicationAbortedError,
|
|
8
9
|
ReplicationAssertionError,
|
|
9
10
|
ServiceError
|
|
10
11
|
} from '@powersync/lib-services-framework';
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
MetricsEngine,
|
|
14
|
+
RelationCache,
|
|
15
|
+
SaveOperationTag,
|
|
16
|
+
SourceEntityDescriptor,
|
|
17
|
+
SourceTable,
|
|
18
|
+
storage
|
|
19
|
+
} from '@powersync/service-core';
|
|
12
20
|
import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules';
|
|
13
21
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
14
22
|
import { MongoLSN } from '../common/MongoLSN.js';
|
|
@@ -22,7 +30,8 @@ import {
|
|
|
22
30
|
getMongoRelation,
|
|
23
31
|
STANDALONE_CHECKPOINT_ID
|
|
24
32
|
} from './MongoRelation.js';
|
|
25
|
-
import {
|
|
33
|
+
import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
|
|
34
|
+
import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
|
|
26
35
|
|
|
27
36
|
export interface ChangeStreamOptions {
|
|
28
37
|
connections: MongoManager;
|
|
@@ -36,10 +45,18 @@ export interface ChangeStreamOptions {
|
|
|
36
45
|
* in closing the stream. To cover that case, reduce the timeout for tests.
|
|
37
46
|
*/
|
|
38
47
|
maxAwaitTimeMS?: number;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Override snapshotChunkLength for testing.
|
|
51
|
+
*/
|
|
52
|
+
snapshotChunkLength?: number;
|
|
53
|
+
|
|
54
|
+
logger?: Logger;
|
|
39
55
|
}
|
|
40
56
|
|
|
41
57
|
interface InitResult {
|
|
42
58
|
needsInitialSync: boolean;
|
|
59
|
+
snapshotLsn: string | null;
|
|
43
60
|
}
|
|
44
61
|
|
|
45
62
|
/**
|
|
@@ -73,16 +90,32 @@ export class ChangeStream {
|
|
|
73
90
|
|
|
74
91
|
private abort_signal: AbortSignal;
|
|
75
92
|
|
|
76
|
-
private
|
|
93
|
+
private relationCache = new RelationCache(getCacheIdentifier);
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
97
|
+
* This is used to determine the replication lag.
|
|
98
|
+
*/
|
|
99
|
+
private oldestUncommittedChange: Date | null = null;
|
|
100
|
+
/**
|
|
101
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
102
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
103
|
+
*/
|
|
104
|
+
private isStartingReplication = true;
|
|
77
105
|
|
|
78
106
|
private checkpointStreamId = new mongo.ObjectId();
|
|
79
107
|
|
|
108
|
+
private logger: Logger;
|
|
109
|
+
|
|
110
|
+
private snapshotChunkLength: number;
|
|
111
|
+
|
|
80
112
|
constructor(options: ChangeStreamOptions) {
|
|
81
113
|
this.storage = options.storage;
|
|
82
114
|
this.metrics = options.metrics;
|
|
83
115
|
this.group_id = options.storage.group_id;
|
|
84
116
|
this.connections = options.connections;
|
|
85
117
|
this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
|
|
118
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
|
|
86
119
|
this.client = this.connections.client;
|
|
87
120
|
this.defaultDb = this.connections.db;
|
|
88
121
|
this.sync_rules = options.storage.getParsedSyncRules({
|
|
@@ -97,6 +130,8 @@ export class ChangeStream {
|
|
|
97
130
|
},
|
|
98
131
|
{ once: true }
|
|
99
132
|
);
|
|
133
|
+
|
|
134
|
+
this.logger = options.logger ?? defaultLogger;
|
|
100
135
|
}
|
|
101
136
|
|
|
102
137
|
get stopped() {
|
|
@@ -111,10 +146,6 @@ export class ChangeStream {
|
|
|
111
146
|
return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
|
|
112
147
|
}
|
|
113
148
|
|
|
114
|
-
private get logPrefix() {
|
|
115
|
-
return `[powersync_${this.group_id}]`;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
149
|
/**
|
|
119
150
|
* This resolves a pattern, persists the related metadata, and returns
|
|
120
151
|
* the resulting SourceTables.
|
|
@@ -150,7 +181,7 @@ export class ChangeStream {
|
|
|
150
181
|
.toArray();
|
|
151
182
|
|
|
152
183
|
if (!tablePattern.isWildcard && collections.length == 0) {
|
|
153
|
-
logger.warn(
|
|
184
|
+
this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
|
|
154
185
|
}
|
|
155
186
|
|
|
156
187
|
for (let collection of collections) {
|
|
@@ -170,37 +201,26 @@ export class ChangeStream {
|
|
|
170
201
|
async initSlot(): Promise<InitResult> {
|
|
171
202
|
const status = await this.storage.getStatus();
|
|
172
203
|
if (status.snapshot_done && status.checkpoint_lsn) {
|
|
173
|
-
logger.info(
|
|
174
|
-
return { needsInitialSync: false };
|
|
204
|
+
this.logger.info(`Initial replication already done`);
|
|
205
|
+
return { needsInitialSync: false, snapshotLsn: null };
|
|
175
206
|
}
|
|
176
207
|
|
|
177
|
-
return { needsInitialSync: true };
|
|
208
|
+
return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
|
|
178
209
|
}
|
|
179
210
|
|
|
180
211
|
async estimatedCount(table: storage.SourceTable): Promise<string> {
|
|
181
|
-
const
|
|
182
|
-
const count = await db.collection(table.table).estimatedDocumentCount();
|
|
212
|
+
const count = await this.estimatedCountNumber(table);
|
|
183
213
|
return `~${count}`;
|
|
184
214
|
}
|
|
185
215
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
* If (partial) replication was done before on this slot, this clears the state
|
|
190
|
-
* and starts again from scratch.
|
|
191
|
-
*/
|
|
192
|
-
async startInitialReplication() {
|
|
193
|
-
await this.storage.clear();
|
|
194
|
-
await this.initialReplication();
|
|
216
|
+
async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
|
|
217
|
+
const db = this.client.db(table.schema);
|
|
218
|
+
return await db.collection(table.table).estimatedDocumentCount();
|
|
195
219
|
}
|
|
196
220
|
|
|
197
|
-
async
|
|
198
|
-
const sourceTables = this.sync_rules.getSourceTables();
|
|
199
|
-
await this.client.connect();
|
|
200
|
-
|
|
201
|
-
// We need to get the snapshot time before taking the initial snapshot.
|
|
221
|
+
private async getSnapshotLsn(): Promise<string> {
|
|
202
222
|
const hello = await this.defaultDb.command({ hello: 1 });
|
|
203
|
-
|
|
223
|
+
// Basic sanity check
|
|
204
224
|
if (hello.msg == 'isdbgrid') {
|
|
205
225
|
throw new ServiceError(
|
|
206
226
|
ErrorCode.PSYNC_S1341,
|
|
@@ -211,33 +231,139 @@ export class ChangeStream {
|
|
|
211
231
|
ErrorCode.PSYNC_S1342,
|
|
212
232
|
'Standalone MongoDB instances are not supported - use a replicaset.'
|
|
213
233
|
);
|
|
214
|
-
} else if (snapshotTime == null) {
|
|
215
|
-
// Not known where this would happen apart from the above cases
|
|
216
|
-
throw new ReplicationAssertionError('MongoDB lastWrite timestamp not found.');
|
|
217
234
|
}
|
|
218
235
|
|
|
236
|
+
// Open a change stream just to get a resume token for later use.
|
|
237
|
+
// We could use clusterTime from the hello command, but that won't tell us if the
|
|
238
|
+
// snapshot isn't valid anymore.
|
|
239
|
+
// If we just use the first resumeToken from the stream, we get two potential issues:
|
|
240
|
+
// 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
|
|
241
|
+
// in source db or other stream issues.
|
|
242
|
+
// 2. The first actual change we get may have the same clusterTime, causing us to incorrect
|
|
243
|
+
// skip that event.
|
|
244
|
+
// Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
|
|
245
|
+
// To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
|
|
246
|
+
// periodically until the timeout is reached.
|
|
247
|
+
|
|
248
|
+
const LSN_TIMEOUT_SECONDS = 60;
|
|
249
|
+
const LSN_CREATE_INTERVAL_SECONDS = 1;
|
|
250
|
+
|
|
251
|
+
await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 });
|
|
252
|
+
const { stream } = streamManager;
|
|
253
|
+
const startTime = performance.now();
|
|
254
|
+
let lastCheckpointCreated = -10_000;
|
|
255
|
+
let eventsSeen = 0;
|
|
256
|
+
|
|
257
|
+
while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
|
|
258
|
+
if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
|
|
259
|
+
await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
|
|
260
|
+
lastCheckpointCreated = performance.now();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
264
|
+
const changeDocument = await stream.tryNext().catch((e) => {
|
|
265
|
+
throw mapChangeStreamError(e);
|
|
266
|
+
});
|
|
267
|
+
if (changeDocument == null) {
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
|
|
272
|
+
|
|
273
|
+
if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
|
|
274
|
+
const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
|
|
275
|
+
if (!this.checkpointStreamId.equals(checkpointId)) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
const { comparable: lsn } = new MongoLSN({
|
|
279
|
+
timestamp: changeDocument.clusterTime!,
|
|
280
|
+
resume_token: changeDocument._id
|
|
281
|
+
});
|
|
282
|
+
return lsn;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
eventsSeen += 1;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Could happen if there is a very large replication lag?
|
|
289
|
+
throw new ServiceError(
|
|
290
|
+
ErrorCode.PSYNC_S1301,
|
|
291
|
+
`Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
private async validateSnapshotLsn(lsn: string) {
|
|
296
|
+
await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
|
|
297
|
+
const { stream } = streamManager;
|
|
298
|
+
try {
|
|
299
|
+
// tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
|
|
300
|
+
await stream.tryNext();
|
|
301
|
+
} catch (e) {
|
|
302
|
+
// Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
|
|
303
|
+
// we possibly cannot recover from it.
|
|
304
|
+
throw mapChangeStreamError(e);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
async initialReplication(snapshotLsn: string | null) {
|
|
309
|
+
const sourceTables = this.sync_rules.getSourceTables();
|
|
310
|
+
await this.client.connect();
|
|
311
|
+
|
|
219
312
|
await this.storage.startBatch(
|
|
220
|
-
{
|
|
313
|
+
{
|
|
314
|
+
logger: this.logger,
|
|
315
|
+
zeroLSN: MongoLSN.ZERO.comparable,
|
|
316
|
+
defaultSchema: this.defaultDb.databaseName,
|
|
317
|
+
storeCurrentData: false,
|
|
318
|
+
skipExistingRows: true
|
|
319
|
+
},
|
|
221
320
|
async (batch) => {
|
|
321
|
+
if (snapshotLsn == null) {
|
|
322
|
+
// First replication attempt - get a snapshot and store the timestamp
|
|
323
|
+
snapshotLsn = await this.getSnapshotLsn();
|
|
324
|
+
await batch.setSnapshotLsn(snapshotLsn);
|
|
325
|
+
this.logger.info(`Marking snapshot at ${snapshotLsn}`);
|
|
326
|
+
} else {
|
|
327
|
+
this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
|
|
328
|
+
// Check that the snapshot is still valid.
|
|
329
|
+
await this.validateSnapshotLsn(snapshotLsn);
|
|
330
|
+
}
|
|
331
|
+
|
|
222
332
|
// Start by resolving all tables.
|
|
223
333
|
// This checks postImage configuration, and that should fail as
|
|
224
|
-
//
|
|
334
|
+
// early as possible.
|
|
225
335
|
let allSourceTables: SourceTable[] = [];
|
|
226
336
|
for (let tablePattern of sourceTables) {
|
|
227
337
|
const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
|
|
228
338
|
allSourceTables.push(...tables);
|
|
229
339
|
}
|
|
230
340
|
|
|
341
|
+
let tablesWithStatus: SourceTable[] = [];
|
|
231
342
|
for (let table of allSourceTables) {
|
|
343
|
+
if (table.snapshotComplete) {
|
|
344
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
let count = await this.estimatedCountNumber(table);
|
|
348
|
+
const updated = await batch.updateTableProgress(table, {
|
|
349
|
+
totalEstimatedCount: count
|
|
350
|
+
});
|
|
351
|
+
tablesWithStatus.push(updated);
|
|
352
|
+
this.relationCache.update(updated);
|
|
353
|
+
this.logger.info(
|
|
354
|
+
`To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
|
|
355
|
+
);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
for (let table of tablesWithStatus) {
|
|
232
359
|
await this.snapshotTable(batch, table);
|
|
233
360
|
await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable);
|
|
234
361
|
|
|
235
362
|
await touch();
|
|
236
363
|
}
|
|
237
364
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
await batch.commit(lsn);
|
|
365
|
+
this.logger.info(`Snapshot commit at ${snapshotLsn}`);
|
|
366
|
+
await batch.commit(snapshotLsn);
|
|
241
367
|
}
|
|
242
368
|
);
|
|
243
369
|
}
|
|
@@ -303,27 +429,38 @@ export class ChangeStream {
|
|
|
303
429
|
}
|
|
304
430
|
|
|
305
431
|
private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
let at = 0;
|
|
432
|
+
const totalEstimatedCount = await this.estimatedCountNumber(table);
|
|
433
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
309
434
|
const db = this.client.db(table.schema);
|
|
310
435
|
const collection = db.collection(table.table);
|
|
311
|
-
|
|
436
|
+
await using query = new ChunkedSnapshotQuery({
|
|
437
|
+
collection,
|
|
438
|
+
key: table.snapshotStatus?.lastKey,
|
|
439
|
+
batchSize: this.snapshotChunkLength
|
|
440
|
+
});
|
|
441
|
+
if (query.lastKey != null) {
|
|
442
|
+
this.logger.info(
|
|
443
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
|
|
444
|
+
);
|
|
445
|
+
} else {
|
|
446
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
447
|
+
}
|
|
312
448
|
|
|
313
449
|
let lastBatch = performance.now();
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
450
|
+
let nextChunkPromise = query.nextChunk();
|
|
451
|
+
while (true) {
|
|
452
|
+
const { docs: docBatch, lastKey } = await nextChunkPromise;
|
|
453
|
+
if (docBatch.length == 0) {
|
|
454
|
+
break;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (this.abort_signal.aborted) {
|
|
458
|
+
throw new ReplicationAbortedError(`Aborted initial replication`);
|
|
459
|
+
}
|
|
460
|
+
|
|
320
461
|
// Pre-fetch next batch, so that we can read and write concurrently
|
|
321
|
-
|
|
462
|
+
nextChunkPromise = query.nextChunk();
|
|
322
463
|
for (let document of docBatch) {
|
|
323
|
-
if (this.abort_signal.aborted) {
|
|
324
|
-
throw new ReplicationAbortedError(`Aborted initial replication`);
|
|
325
|
-
}
|
|
326
|
-
|
|
327
464
|
const record = constructAfterRecord(document);
|
|
328
465
|
|
|
329
466
|
// This auto-flushes when the batch reaches its size limit
|
|
@@ -337,20 +474,27 @@ export class ChangeStream {
|
|
|
337
474
|
});
|
|
338
475
|
}
|
|
339
476
|
|
|
477
|
+
// Important: flush before marking progress
|
|
478
|
+
await batch.flush();
|
|
340
479
|
at += docBatch.length;
|
|
341
480
|
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length);
|
|
481
|
+
|
|
482
|
+
table = await batch.updateTableProgress(table, {
|
|
483
|
+
lastKey,
|
|
484
|
+
replicatedCount: at,
|
|
485
|
+
totalEstimatedCount: totalEstimatedCount
|
|
486
|
+
});
|
|
487
|
+
this.relationCache.update(table);
|
|
488
|
+
|
|
342
489
|
const duration = performance.now() - lastBatch;
|
|
343
490
|
lastBatch = performance.now();
|
|
344
|
-
logger.info(
|
|
345
|
-
|
|
491
|
+
this.logger.info(
|
|
492
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
|
|
346
493
|
);
|
|
347
494
|
await touch();
|
|
348
495
|
}
|
|
349
496
|
// In case the loop was interrupted, make sure we await the last promise.
|
|
350
|
-
await
|
|
351
|
-
|
|
352
|
-
await batch.flush();
|
|
353
|
-
logger.info(`${this.logPrefix} Replicated ${at} documents for ${table.qualifiedName}`);
|
|
497
|
+
await nextChunkPromise;
|
|
354
498
|
}
|
|
355
499
|
|
|
356
500
|
private async getRelation(
|
|
@@ -358,8 +502,7 @@ export class ChangeStream {
|
|
|
358
502
|
descriptor: SourceEntityDescriptor,
|
|
359
503
|
options: { snapshot: boolean }
|
|
360
504
|
): Promise<SourceTable> {
|
|
361
|
-
const
|
|
362
|
-
const existing = this.relation_cache.get(cacheId);
|
|
505
|
+
const existing = this.relationCache.get(descriptor);
|
|
363
506
|
if (existing != null) {
|
|
364
507
|
return existing;
|
|
365
508
|
}
|
|
@@ -399,7 +542,7 @@ export class ChangeStream {
|
|
|
399
542
|
collMod: collectionInfo.name,
|
|
400
543
|
changeStreamPreAndPostImages: { enabled: true }
|
|
401
544
|
});
|
|
402
|
-
logger.info(
|
|
545
|
+
this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`);
|
|
403
546
|
} else if (!enabled) {
|
|
404
547
|
throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`);
|
|
405
548
|
}
|
|
@@ -425,12 +568,12 @@ export class ChangeStream {
|
|
|
425
568
|
entity_descriptor: descriptor,
|
|
426
569
|
sync_rules: this.sync_rules
|
|
427
570
|
});
|
|
428
|
-
this.
|
|
571
|
+
this.relationCache.update(result.table);
|
|
429
572
|
|
|
430
573
|
// Drop conflicting collections.
|
|
431
574
|
// This is generally not expected for MongoDB source dbs, so we log an error.
|
|
432
575
|
if (result.dropTables.length > 0) {
|
|
433
|
-
logger.error(
|
|
576
|
+
this.logger.error(
|
|
434
577
|
`Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
|
|
435
578
|
);
|
|
436
579
|
await batch.drop(result.dropTables);
|
|
@@ -442,7 +585,7 @@ export class ChangeStream {
|
|
|
442
585
|
// 3. The table is used in sync rules.
|
|
443
586
|
const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
|
|
444
587
|
if (shouldSnapshot) {
|
|
445
|
-
logger.info(
|
|
588
|
+
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
|
|
446
589
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
447
590
|
await batch.truncate([result.table]);
|
|
448
591
|
|
|
@@ -462,7 +605,7 @@ export class ChangeStream {
|
|
|
462
605
|
change: mongo.ChangeStreamDocument
|
|
463
606
|
): Promise<storage.FlushedResult | null> {
|
|
464
607
|
if (!table.syncAny) {
|
|
465
|
-
logger.debug(
|
|
608
|
+
this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`);
|
|
466
609
|
return null;
|
|
467
610
|
}
|
|
468
611
|
|
|
@@ -525,7 +668,11 @@ export class ChangeStream {
|
|
|
525
668
|
const result = await this.initSlot();
|
|
526
669
|
await this.setupCheckpointsCollection();
|
|
527
670
|
if (result.needsInitialSync) {
|
|
528
|
-
|
|
671
|
+
if (result.snapshotLsn == null) {
|
|
672
|
+
// Snapshot LSN is not present, so we need to start replication from scratch.
|
|
673
|
+
await this.storage.clear({ signal: this.abort_signal });
|
|
674
|
+
}
|
|
675
|
+
await this.initialReplication(result.snapshotLsn);
|
|
529
676
|
}
|
|
530
677
|
}
|
|
531
678
|
|
|
@@ -544,73 +691,99 @@ export class ChangeStream {
|
|
|
544
691
|
}
|
|
545
692
|
}
|
|
546
693
|
|
|
694
|
+
private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) {
|
|
695
|
+
const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
|
|
696
|
+
const startAfter = lastLsn?.timestamp;
|
|
697
|
+
const resumeAfter = lastLsn?.resumeToken;
|
|
698
|
+
|
|
699
|
+
const filters = this.getSourceNamespaceFilters();
|
|
700
|
+
|
|
701
|
+
const pipeline: mongo.Document[] = [
|
|
702
|
+
{
|
|
703
|
+
$match: filters.$match
|
|
704
|
+
},
|
|
705
|
+
{ $changeStreamSplitLargeEvent: {} }
|
|
706
|
+
];
|
|
707
|
+
|
|
708
|
+
let fullDocument: 'required' | 'updateLookup';
|
|
709
|
+
|
|
710
|
+
if (this.usePostImages) {
|
|
711
|
+
// 'read_only' or 'auto_configure'
|
|
712
|
+
// Configuration happens during snapshot, or when we see new
|
|
713
|
+
// collections.
|
|
714
|
+
fullDocument = 'required';
|
|
715
|
+
} else {
|
|
716
|
+
fullDocument = 'updateLookup';
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
const streamOptions: mongo.ChangeStreamOptions = {
|
|
720
|
+
showExpandedEvents: true,
|
|
721
|
+
maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS,
|
|
722
|
+
fullDocument: fullDocument
|
|
723
|
+
};
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Only one of these options can be supplied at a time.
|
|
727
|
+
*/
|
|
728
|
+
if (resumeAfter) {
|
|
729
|
+
streamOptions.resumeAfter = resumeAfter;
|
|
730
|
+
} else {
|
|
731
|
+
// Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the
|
|
732
|
+
// case if we have an old one.
|
|
733
|
+
streamOptions.startAtOperationTime = startAfter;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
let stream: mongo.ChangeStream<mongo.Document>;
|
|
737
|
+
if (filters.multipleDatabases) {
|
|
738
|
+
// Requires readAnyDatabase@admin on Atlas
|
|
739
|
+
stream = this.client.watch(pipeline, streamOptions);
|
|
740
|
+
} else {
|
|
741
|
+
// Same general result, but requires less permissions than the above
|
|
742
|
+
stream = this.defaultDb.watch(pipeline, streamOptions);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
this.abort_signal.addEventListener('abort', () => {
|
|
746
|
+
stream.close();
|
|
747
|
+
});
|
|
748
|
+
|
|
749
|
+
return {
|
|
750
|
+
stream,
|
|
751
|
+
filters,
|
|
752
|
+
[Symbol.asyncDispose]: async () => {
|
|
753
|
+
return stream.close();
|
|
754
|
+
}
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
|
|
547
758
|
async streamChangesInternal() {
|
|
548
759
|
// Auto-activate as soon as initial replication is done
|
|
549
760
|
await this.storage.autoActivate();
|
|
550
761
|
|
|
551
762
|
await this.storage.startBatch(
|
|
552
|
-
{
|
|
763
|
+
{
|
|
764
|
+
logger: this.logger,
|
|
765
|
+
zeroLSN: MongoLSN.ZERO.comparable,
|
|
766
|
+
defaultSchema: this.defaultDb.databaseName,
|
|
767
|
+
storeCurrentData: false
|
|
768
|
+
},
|
|
553
769
|
async (batch) => {
|
|
554
770
|
const { lastCheckpointLsn } = batch;
|
|
555
|
-
const lastLsn =
|
|
771
|
+
const lastLsn = MongoLSN.fromSerialized(lastCheckpointLsn!);
|
|
556
772
|
const startAfter = lastLsn?.timestamp;
|
|
557
|
-
const resumeAfter = lastLsn?.resumeToken;
|
|
558
|
-
|
|
559
|
-
logger.info(`${this.logPrefix} Resume streaming at ${startAfter?.inspect()} / ${lastLsn}`);
|
|
560
|
-
|
|
561
|
-
const filters = this.getSourceNamespaceFilters();
|
|
562
|
-
|
|
563
|
-
const pipeline: mongo.Document[] = [
|
|
564
|
-
{
|
|
565
|
-
$match: filters.$match
|
|
566
|
-
},
|
|
567
|
-
{ $changeStreamSplitLargeEvent: {} }
|
|
568
|
-
];
|
|
569
|
-
|
|
570
|
-
let fullDocument: 'required' | 'updateLookup';
|
|
571
|
-
|
|
572
|
-
if (this.usePostImages) {
|
|
573
|
-
// 'read_only' or 'auto_configure'
|
|
574
|
-
// Configuration happens during snapshot, or when we see new
|
|
575
|
-
// collections.
|
|
576
|
-
fullDocument = 'required';
|
|
577
|
-
} else {
|
|
578
|
-
fullDocument = 'updateLookup';
|
|
579
|
-
}
|
|
580
773
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
fullDocument: fullDocument
|
|
585
|
-
};
|
|
586
|
-
|
|
587
|
-
/**
|
|
588
|
-
* Only one of these options can be supplied at a time.
|
|
589
|
-
*/
|
|
590
|
-
if (resumeAfter) {
|
|
591
|
-
streamOptions.resumeAfter = resumeAfter;
|
|
592
|
-
} else {
|
|
593
|
-
streamOptions.startAtOperationTime = startAfter;
|
|
594
|
-
}
|
|
774
|
+
// It is normal for this to be a minute or two old when there is a low volume
|
|
775
|
+
// of ChangeStream events.
|
|
776
|
+
const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
|
|
595
777
|
|
|
596
|
-
|
|
597
|
-
if (filters.multipleDatabases) {
|
|
598
|
-
// Requires readAnyDatabase@admin on Atlas
|
|
599
|
-
stream = this.client.watch(pipeline, streamOptions);
|
|
600
|
-
} else {
|
|
601
|
-
// Same general result, but requires less permissions than the above
|
|
602
|
-
stream = this.defaultDb.watch(pipeline, streamOptions);
|
|
603
|
-
}
|
|
778
|
+
this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
|
|
604
779
|
|
|
780
|
+
await using streamManager = this.openChangeStream({ lsn: lastCheckpointLsn });
|
|
781
|
+
const { stream, filters } = streamManager;
|
|
605
782
|
if (this.abort_signal.aborted) {
|
|
606
|
-
stream.close();
|
|
783
|
+
await stream.close();
|
|
607
784
|
return;
|
|
608
785
|
}
|
|
609
786
|
|
|
610
|
-
this.abort_signal.addEventListener('abort', () => {
|
|
611
|
-
stream.close();
|
|
612
|
-
});
|
|
613
|
-
|
|
614
787
|
// Always start with a checkpoint.
|
|
615
788
|
// This helps us to clear errors when restarting, even if there is
|
|
616
789
|
// no data to replicate.
|
|
@@ -655,10 +828,16 @@ export class ChangeStream {
|
|
|
655
828
|
// We add an additional check for waitForCheckpointLsn == null, to make sure we're not
|
|
656
829
|
// doing a keepalive in the middle of a transaction.
|
|
657
830
|
if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
|
|
658
|
-
const { comparable: lsn } = MongoLSN.fromResumeToken(stream.resumeToken);
|
|
831
|
+
const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
|
|
659
832
|
await batch.keepalive(lsn);
|
|
660
833
|
await touch();
|
|
661
834
|
lastEmptyResume = performance.now();
|
|
835
|
+
// Log the token update. This helps as a general "replication is still active" message in the logs.
|
|
836
|
+
// This token would typically be around 10s behind.
|
|
837
|
+
this.logger.info(
|
|
838
|
+
`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`
|
|
839
|
+
);
|
|
840
|
+
this.isStartingReplication = false;
|
|
662
841
|
}
|
|
663
842
|
continue;
|
|
664
843
|
}
|
|
@@ -711,8 +890,8 @@ export class ChangeStream {
|
|
|
711
890
|
|
|
712
891
|
if (!flexDbNameWorkaroundLogged) {
|
|
713
892
|
flexDbNameWorkaroundLogged = true;
|
|
714
|
-
logger.warn(
|
|
715
|
-
|
|
893
|
+
this.logger.warn(
|
|
894
|
+
`Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
|
|
716
895
|
);
|
|
717
896
|
}
|
|
718
897
|
}
|
|
@@ -771,7 +950,12 @@ export class ChangeStream {
|
|
|
771
950
|
if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
|
|
772
951
|
waitForCheckpointLsn = null;
|
|
773
952
|
}
|
|
774
|
-
await batch.commit(lsn);
|
|
953
|
+
const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange });
|
|
954
|
+
|
|
955
|
+
if (didCommit) {
|
|
956
|
+
this.oldestUncommittedChange = null;
|
|
957
|
+
this.isStartingReplication = false;
|
|
958
|
+
}
|
|
775
959
|
} else if (
|
|
776
960
|
changeDocument.operationType == 'insert' ||
|
|
777
961
|
changeDocument.operationType == 'update' ||
|
|
@@ -790,6 +974,9 @@ export class ChangeStream {
|
|
|
790
974
|
snapshot: true
|
|
791
975
|
});
|
|
792
976
|
if (table.syncAny) {
|
|
977
|
+
if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
|
|
978
|
+
this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
|
|
979
|
+
}
|
|
793
980
|
await this.writeChange(batch, table, changeDocument);
|
|
794
981
|
}
|
|
795
982
|
} else if (changeDocument.operationType == 'drop') {
|
|
@@ -800,7 +987,7 @@ export class ChangeStream {
|
|
|
800
987
|
});
|
|
801
988
|
if (table.syncAny) {
|
|
802
989
|
await batch.drop([table]);
|
|
803
|
-
this.
|
|
990
|
+
this.relationCache.delete(table);
|
|
804
991
|
}
|
|
805
992
|
} else if (changeDocument.operationType == 'rename') {
|
|
806
993
|
const relFrom = getMongoRelation(changeDocument.ns);
|
|
@@ -811,7 +998,7 @@ export class ChangeStream {
|
|
|
811
998
|
});
|
|
812
999
|
if (tableFrom.syncAny) {
|
|
813
1000
|
await batch.drop([tableFrom]);
|
|
814
|
-
this.
|
|
1001
|
+
this.relationCache.delete(relFrom);
|
|
815
1002
|
}
|
|
816
1003
|
// Here we do need to snapshot the new table
|
|
817
1004
|
const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
|
|
@@ -825,6 +1012,19 @@ export class ChangeStream {
|
|
|
825
1012
|
}
|
|
826
1013
|
);
|
|
827
1014
|
}
|
|
1015
|
+
|
|
1016
|
+
async getReplicationLagMillis(): Promise<number | undefined> {
|
|
1017
|
+
if (this.oldestUncommittedChange == null) {
|
|
1018
|
+
if (this.isStartingReplication) {
|
|
1019
|
+
// We don't have anything to compute replication lag with yet.
|
|
1020
|
+
return undefined;
|
|
1021
|
+
} else {
|
|
1022
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
1023
|
+
return 0;
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
1027
|
+
}
|
|
828
1028
|
}
|
|
829
1029
|
|
|
830
1030
|
async function touch() {
|