@powersync/service-module-mssql 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +67 -0
  2. package/README.md +3 -0
  3. package/ci/init-mssql.sql +50 -0
  4. package/dist/api/MSSQLRouteAPIAdapter.d.ts +21 -0
  5. package/dist/api/MSSQLRouteAPIAdapter.js +248 -0
  6. package/dist/api/MSSQLRouteAPIAdapter.js.map +1 -0
  7. package/dist/common/LSN.d.ts +37 -0
  8. package/dist/common/LSN.js +64 -0
  9. package/dist/common/LSN.js.map +1 -0
  10. package/dist/common/MSSQLSourceTable.d.ts +27 -0
  11. package/dist/common/MSSQLSourceTable.js +35 -0
  12. package/dist/common/MSSQLSourceTable.js.map +1 -0
  13. package/dist/common/MSSQLSourceTableCache.d.ts +14 -0
  14. package/dist/common/MSSQLSourceTableCache.js +28 -0
  15. package/dist/common/MSSQLSourceTableCache.js.map +1 -0
  16. package/dist/common/mssqls-to-sqlite.d.ts +18 -0
  17. package/dist/common/mssqls-to-sqlite.js +143 -0
  18. package/dist/common/mssqls-to-sqlite.js.map +1 -0
  19. package/dist/index.d.ts +1 -0
  20. package/dist/index.js +2 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/module/MSSQLModule.d.ts +15 -0
  23. package/dist/module/MSSQLModule.js +68 -0
  24. package/dist/module/MSSQLModule.js.map +1 -0
  25. package/dist/replication/CDCPoller.d.ts +67 -0
  26. package/dist/replication/CDCPoller.js +183 -0
  27. package/dist/replication/CDCPoller.js.map +1 -0
  28. package/dist/replication/CDCReplicationJob.d.ts +17 -0
  29. package/dist/replication/CDCReplicationJob.js +76 -0
  30. package/dist/replication/CDCReplicationJob.js.map +1 -0
  31. package/dist/replication/CDCReplicator.d.ts +18 -0
  32. package/dist/replication/CDCReplicator.js +55 -0
  33. package/dist/replication/CDCReplicator.js.map +1 -0
  34. package/dist/replication/CDCStream.d.ts +106 -0
  35. package/dist/replication/CDCStream.js +536 -0
  36. package/dist/replication/CDCStream.js.map +1 -0
  37. package/dist/replication/MSSQLConnectionManager.d.ts +23 -0
  38. package/dist/replication/MSSQLConnectionManager.js +97 -0
  39. package/dist/replication/MSSQLConnectionManager.js.map +1 -0
  40. package/dist/replication/MSSQLConnectionManagerFactory.d.ts +10 -0
  41. package/dist/replication/MSSQLConnectionManagerFactory.js +28 -0
  42. package/dist/replication/MSSQLConnectionManagerFactory.js.map +1 -0
  43. package/dist/replication/MSSQLErrorRateLimiter.d.ts +10 -0
  44. package/dist/replication/MSSQLErrorRateLimiter.js +34 -0
  45. package/dist/replication/MSSQLErrorRateLimiter.js.map +1 -0
  46. package/dist/replication/MSSQLSnapshotQuery.d.ts +71 -0
  47. package/dist/replication/MSSQLSnapshotQuery.js +190 -0
  48. package/dist/replication/MSSQLSnapshotQuery.js.map +1 -0
  49. package/dist/types/mssql-data-types.d.ts +66 -0
  50. package/dist/types/mssql-data-types.js +62 -0
  51. package/dist/types/mssql-data-types.js.map +1 -0
  52. package/dist/types/types.d.ts +177 -0
  53. package/dist/types/types.js +141 -0
  54. package/dist/types/types.js.map +1 -0
  55. package/dist/utils/mssql.d.ts +80 -0
  56. package/dist/utils/mssql.js +329 -0
  57. package/dist/utils/mssql.js.map +1 -0
  58. package/dist/utils/schema.d.ts +21 -0
  59. package/dist/utils/schema.js +131 -0
  60. package/dist/utils/schema.js.map +1 -0
  61. package/package.json +51 -0
  62. package/src/api/MSSQLRouteAPIAdapter.ts +283 -0
  63. package/src/common/LSN.ts +77 -0
  64. package/src/common/MSSQLSourceTable.ts +54 -0
  65. package/src/common/MSSQLSourceTableCache.ts +36 -0
  66. package/src/common/mssqls-to-sqlite.ts +151 -0
  67. package/src/index.ts +1 -0
  68. package/src/module/MSSQLModule.ts +82 -0
  69. package/src/replication/CDCPoller.ts +241 -0
  70. package/src/replication/CDCReplicationJob.ts +87 -0
  71. package/src/replication/CDCReplicator.ts +70 -0
  72. package/src/replication/CDCStream.ts +688 -0
  73. package/src/replication/MSSQLConnectionManager.ts +113 -0
  74. package/src/replication/MSSQLConnectionManagerFactory.ts +33 -0
  75. package/src/replication/MSSQLErrorRateLimiter.ts +36 -0
  76. package/src/replication/MSSQLSnapshotQuery.ts +230 -0
  77. package/src/types/mssql-data-types.ts +79 -0
  78. package/src/types/types.ts +224 -0
  79. package/src/utils/mssql.ts +420 -0
  80. package/src/utils/schema.ts +172 -0
  81. package/test/src/CDCStream.test.ts +206 -0
  82. package/test/src/CDCStreamTestContext.ts +212 -0
  83. package/test/src/CDCStream_resumable_snapshot.test.ts +152 -0
  84. package/test/src/env.ts +11 -0
  85. package/test/src/mssql-to-sqlite.test.ts +474 -0
  86. package/test/src/setup.ts +12 -0
  87. package/test/src/util.ts +189 -0
  88. package/test/tsconfig.json +28 -0
  89. package/test/tsconfig.tsbuildinfo +1 -0
  90. package/tsconfig.json +26 -0
  91. package/tsconfig.tsbuildinfo +1 -0
  92. package/vitest.config.ts +15 -0
@@ -0,0 +1,688 @@
1
+ import {
2
+ container,
3
+ DatabaseConnectionError,
4
+ ErrorCode,
5
+ Logger,
6
+ logger as defaultLogger,
7
+ ReplicationAbortedError,
8
+ ReplicationAssertionError,
9
+ ServiceAssertionError
10
+ } from '@powersync/lib-services-framework';
11
+ import { getUuidReplicaIdentityBson, MetricsEngine, SourceEntityDescriptor, storage } from '@powersync/service-core';
12
+
13
+ import { SqliteInputRow, SqliteRow, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules';
14
+
15
+ import { ReplicationMetric } from '@powersync/service-types';
16
+ import {
17
+ BatchedSnapshotQuery,
18
+ IdSnapshotQuery,
19
+ MSSQLSnapshotQuery,
20
+ PrimaryKeyValue,
21
+ SimpleSnapshotQuery
22
+ } from './MSSQLSnapshotQuery.js';
23
+ import { MSSQLConnectionManager } from './MSSQLConnectionManager.js';
24
+ import { getReplicationIdentityColumns, getTablesFromPattern, ResolvedTable } from '../utils/schema.js';
25
+ import {
26
+ checkSourceConfiguration,
27
+ createCheckpoint,
28
+ getCaptureInstance,
29
+ getLatestLSN,
30
+ getLatestReplicatedLSN,
31
+ isIColumnMetadata,
32
+ isTableEnabledForCDC,
33
+ isWithinRetentionThreshold,
34
+ toQualifiedTableName
35
+ } from '../utils/mssql.js';
36
+ import sql from 'mssql';
37
+ import { CDCToSqliteRow, toSqliteInputRow } from '../common/mssqls-to-sqlite.js';
38
+ import { LSN } from '../common/LSN.js';
39
+ import { MSSQLSourceTable } from '../common/MSSQLSourceTable.js';
40
+ import { MSSQLSourceTableCache } from '../common/MSSQLSourceTableCache.js';
41
+ import { CDCEventHandler, CDCPoller } from './CDCPoller.js';
42
+ import { CDCPollingOptions } from '../types/types.js';
43
+
44
+ export interface CDCStreamOptions {
45
+ connections: MSSQLConnectionManager;
46
+ storage: storage.SyncRulesBucketStorage;
47
+ metrics: MetricsEngine;
48
+ abortSignal: AbortSignal;
49
+ logger?: Logger;
50
+ /**
51
+ * Override snapshot batch size for testing.
52
+ * Defaults to 10_000.
53
+ * Note that queries are streamed, so we don't keep that much data in memory.
54
+ */
55
+ snapshotBatchSize?: number;
56
+
57
+ pollingOptions: CDCPollingOptions;
58
+ }
59
+
60
+ export enum SnapshotStatus {
61
+ IN_PROGRESS = 'in-progress',
62
+ DONE = 'done',
63
+ RESTART_REQUIRED = 'restart-required'
64
+ }
65
+
66
+ export interface SnapshotStatusResult {
67
+ status: SnapshotStatus;
68
+ snapshotLSN: string | null;
69
+ }
70
+
71
+ export class CDCConfigurationError extends Error {
72
+ constructor(message: string) {
73
+ super(message);
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Thrown when required updates in the CDC instance tables are no longer available
79
+ *
80
+ * Possible reasons:
81
+ * * Older data has been cleaned up due to exceeding the retention period.
82
+ * This can happen if PowerSync was stopped for a long period of time.
83
+ */
84
+ export class CDCDataExpiredError extends DatabaseConnectionError {
85
+ constructor(message: string, cause: any) {
86
+ super(ErrorCode.PSYNC_S1500, message, cause);
87
+ }
88
+ }
89
+
90
+ export class CDCStream {
91
+ private readonly syncRules: SqlSyncRules;
92
+ private readonly storage: storage.SyncRulesBucketStorage;
93
+ private readonly connections: MSSQLConnectionManager;
94
+ private readonly abortSignal: AbortSignal;
95
+ private readonly logger: Logger;
96
+
97
+ private tableCache = new MSSQLSourceTableCache();
98
+
99
+ /**
100
+ * Time of the oldest uncommitted change, according to the source db.
101
+ * This is used to determine the replication lag.
102
+ */
103
+ private oldestUncommittedChange: Date | null = null;
104
+ /**
105
+ * Keep track of whether we have done a commit or keepalive yet.
106
+ * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
107
+ */
108
+ public isStartingReplication = true;
109
+
110
+ constructor(private options: CDCStreamOptions) {
111
+ this.logger = options.logger ?? defaultLogger;
112
+ this.storage = options.storage;
113
+ this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: options.connections.schema });
114
+ this.connections = options.connections;
115
+ this.abortSignal = options.abortSignal;
116
+ }
117
+
118
+ private get metrics() {
119
+ return this.options.metrics;
120
+ }
121
+
122
+ get stopped() {
123
+ return this.abortSignal.aborted;
124
+ }
125
+
126
+ get defaultSchema() {
127
+ return this.connections.schema;
128
+ }
129
+
130
+ get groupId() {
131
+ return this.options.storage.group_id;
132
+ }
133
+
134
+ get connectionId() {
135
+ const { connectionId } = this.connections;
136
+ // Default to 1 if not set
137
+ if (!connectionId) {
138
+ return 1;
139
+ }
140
+ /**
141
+ * This is often `"default"` (string) which will parse to `NaN`
142
+ */
143
+ const parsed = Number.parseInt(connectionId);
144
+ if (isNaN(parsed)) {
145
+ return 1;
146
+ }
147
+ return parsed;
148
+ }
149
+
150
+ get connectionTag() {
151
+ return this.connections.connectionTag;
152
+ }
153
+
154
+ get snapshotBatchSize() {
155
+ return this.options.snapshotBatchSize ?? 10_000;
156
+ }
157
+
158
+ async replicate() {
159
+ try {
160
+ await this.initReplication();
161
+ await this.streamChanges();
162
+ } catch (e) {
163
+ await this.storage.reportError(e);
164
+ throw e;
165
+ }
166
+ }
167
+
168
+ async populateTableCache() {
169
+ const sourceTables = this.syncRules.getSourceTables();
170
+ await this.storage.startBatch(
171
+ {
172
+ logger: this.logger,
173
+ zeroLSN: LSN.ZERO,
174
+ defaultSchema: this.defaultSchema,
175
+ storeCurrentData: true
176
+ },
177
+ async (batch) => {
178
+ for (let tablePattern of sourceTables) {
179
+ const tables = await this.getQualifiedTableNames(batch, tablePattern);
180
+ for (const table of tables) {
181
+ this.tableCache.set(table);
182
+ }
183
+ }
184
+ }
185
+ );
186
+ }
187
+
188
+ async getQualifiedTableNames(
189
+ batch: storage.BucketStorageBatch,
190
+ tablePattern: TablePattern
191
+ ): Promise<MSSQLSourceTable[]> {
192
+ if (tablePattern.connectionTag != this.connections.connectionTag) {
193
+ return [];
194
+ }
195
+
196
+ const matchedTables: ResolvedTable[] = await getTablesFromPattern(this.connections, tablePattern);
197
+
198
+ const tables: MSSQLSourceTable[] = [];
199
+ for (const matchedTable of matchedTables) {
200
+ const isEnabled = await isTableEnabledForCDC({
201
+ connectionManager: this.connections,
202
+ table: matchedTable.name,
203
+ schema: matchedTable.schema
204
+ });
205
+
206
+ if (!isEnabled) {
207
+ this.logger.info(`Skipping ${matchedTable.schema}.${matchedTable.name} - table is not enabled for CDC.`);
208
+ continue;
209
+ }
210
+
211
+ // TODO: Check RLS settings for table
212
+
213
+ const replicaIdColumns = await getReplicationIdentityColumns({
214
+ connectionManager: this.connections,
215
+ tableName: matchedTable.name,
216
+ schema: matchedTable.schema
217
+ });
218
+
219
+ const table = await this.processTable(
220
+ batch,
221
+ {
222
+ name: matchedTable.name,
223
+ schema: matchedTable.schema,
224
+ objectId: matchedTable.objectId,
225
+ replicaIdColumns: replicaIdColumns.columns
226
+ },
227
+ false
228
+ );
229
+
230
+ tables.push(table);
231
+ }
232
+ return tables;
233
+ }
234
+
235
+ async processTable(
236
+ batch: storage.BucketStorageBatch,
237
+ table: SourceEntityDescriptor,
238
+ snapshot: boolean
239
+ ): Promise<MSSQLSourceTable> {
240
+ if (!table.objectId && typeof table.objectId != 'number') {
241
+ throw new ReplicationAssertionError(`objectId expected, got ${typeof table.objectId}`);
242
+ }
243
+ const resolved = await this.storage.resolveTable({
244
+ group_id: this.groupId,
245
+ connection_id: this.connectionId,
246
+ connection_tag: this.connectionTag,
247
+ entity_descriptor: table,
248
+ sync_rules: this.syncRules
249
+ });
250
+ const captureInstance = await getCaptureInstance({ connectionManager: this.connections, tableName: resolved.table.name, schema: resolved.table.schema });
251
+ if (!captureInstance) {
252
+ throw new ServiceAssertionError(
253
+ `Missing capture instance for table ${toQualifiedTableName(resolved.table.schema, resolved.table.name)}`
254
+ );
255
+ }
256
+ const resolvedTable = new MSSQLSourceTable({
257
+ sourceTable: resolved.table,
258
+ captureInstance: captureInstance
259
+ });
260
+
261
+ // Drop conflicting tables. This includes for example renamed tables.
262
+ await batch.drop(resolved.dropTables);
263
+
264
+ // Snapshot if:
265
+ // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
266
+ // 2. Snapshot is not already done, AND:
267
+ // 3. The table is used in sync rules.
268
+ const shouldSnapshot = snapshot && !resolved.table.snapshotComplete && resolved.table.syncAny;
269
+
270
+ if (shouldSnapshot) {
271
+ // Truncate this table in case a previous snapshot was interrupted.
272
+ await batch.truncate([resolved.table]);
273
+
274
+ // Start the snapshot inside a transaction.
275
+ try {
276
+ await this.snapshotTableInTx(batch, resolvedTable);
277
+ } finally {
278
+ // TODO Cleanup?
279
+ }
280
+ }
281
+
282
+ return resolvedTable;
283
+ }
284
+
285
+ private async snapshotTableInTx(
286
+ batch: storage.BucketStorageBatch,
287
+ table: MSSQLSourceTable,
288
+ limited?: PrimaryKeyValue[]
289
+ ): Promise<void> {
290
+ // Note: We use the "Read Committed" isolation level here, not snapshot isolation.
291
+ // The data may change during the transaction, but that is compensated for in the streaming
292
+ // replication afterward.
293
+ const transaction = await this.connections.createTransaction();
294
+ await transaction.begin(sql.ISOLATION_LEVEL.READ_COMMITTED);
295
+ try {
296
+ await this.snapshotTable(batch, transaction, table, limited);
297
+
298
+ // Get the current LSN.
299
+ // The data will only be consistent once incremental replication has passed that point.
300
+ // We have to get this LSN _after_ we have finished the table snapshot.
301
+ //
302
+ // There are basically two relevant LSNs here:
303
+ // A: PreSnapshot: The LSN before the snapshot starts.
304
+ // B: PostSnapshot: The LSN after the table snapshot is complete, which is what we get here.
305
+ // When we do the snapshot queries, the data that we get back for each batch could match the state
306
+ // anywhere between A and B. To actually have a consistent state on our side, we need to:
307
+ // 1. Complete the snapshot.
308
+ // 2. Wait until logical replication has caught up with all the changes between A and B.
309
+ // Calling `markSnapshotDone(LSN B)` covers that.
310
+ const postSnapshotLSN = await getLatestLSN(this.connections);
311
+ // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction.
312
+ await transaction.commit();
313
+ const [updatedSourceTable] = await batch.markSnapshotDone([table.sourceTable], postSnapshotLSN.toString());
314
+ this.tableCache.updateSourceTable(updatedSourceTable);
315
+ } catch (e) {
316
+ await transaction.rollback();
317
+ throw e;
318
+ }
319
+ }
320
+
321
+ private async snapshotTable(
322
+ batch: storage.BucketStorageBatch,
323
+ transaction: sql.Transaction,
324
+ table: MSSQLSourceTable,
325
+ limited?: PrimaryKeyValue[]
326
+ ) {
327
+ let totalEstimatedCount = table.sourceTable.snapshotStatus?.totalEstimatedCount;
328
+ let replicatedCount = table.sourceTable.snapshotStatus?.replicatedCount ?? 0;
329
+ let lastCountTime = 0;
330
+ let query: MSSQLSnapshotQuery;
331
+ // We do streaming on two levels:
332
+ // 1. Coarse select from the entire table, stream rows 1 by one
333
+ // 2. Fine level: Stream batches of rows with each fetch call
334
+ if (limited) {
335
+ query = new IdSnapshotQuery(transaction, table, limited);
336
+ } else if (BatchedSnapshotQuery.supports(table)) {
337
+ // Single primary key - we can use the primary key for chunking
338
+ const orderByKey = table.sourceTable.replicaIdColumns[0];
339
+ query = new BatchedSnapshotQuery(
340
+ transaction,
341
+ table,
342
+ this.snapshotBatchSize,
343
+ table.sourceTable.snapshotStatus?.lastKey ?? null
344
+ );
345
+ if (table.sourceTable.snapshotStatus?.lastKey != null) {
346
+ this.logger.info(
347
+ `Replicating ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(query as BatchedSnapshotQuery).lastKey}`
348
+ );
349
+ } else {
350
+ this.logger.info(
351
+ `Replicating ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()} - resumable`
352
+ );
353
+ }
354
+ } else {
355
+ // Fallback case - query the entire table
356
+ this.logger.info(
357
+ `Replicating ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()} - not resumable`
358
+ );
359
+ query = new SimpleSnapshotQuery(transaction, table);
360
+ replicatedCount = 0;
361
+ }
362
+ await query.initialize();
363
+
364
+ let columns: sql.IColumnMetadata | null = null;
365
+ let hasRemainingData = true;
366
+ while (hasRemainingData) {
367
+ // Fetch 10k at a time.
368
+ // The balance here is between latency overhead per FETCH call,
369
+ // and not spending too much time on each FETCH call.
370
+ // We aim for a couple of seconds on each FETCH call.
371
+ let batchReplicatedCount = 0;
372
+ const cursor = query.next();
373
+ for await (const result of cursor) {
374
+ if (columns == null && isIColumnMetadata(result)) {
375
+ columns = result;
376
+ continue;
377
+ } else {
378
+ if (!columns) {
379
+ throw new ReplicationAssertionError(`Missing column metadata`);
380
+ }
381
+ const inputRow: SqliteInputRow = toSqliteInputRow(result, columns);
382
+ const row = this.syncRules.applyRowContext<never>(inputRow);
383
+ // This auto-flushes when the batch reaches its size limit
384
+ await batch.save({
385
+ tag: storage.SaveOperationTag.INSERT,
386
+ sourceTable: table.sourceTable,
387
+ before: undefined,
388
+ beforeReplicaId: undefined,
389
+ after: row,
390
+ afterReplicaId: getUuidReplicaIdentityBson(row, table.sourceTable.replicaIdColumns)
391
+ });
392
+
393
+ replicatedCount++;
394
+ batchReplicatedCount++;
395
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
396
+ }
397
+
398
+ this.touch();
399
+ }
400
+
401
+ // Important: flush before marking progress
402
+ await batch.flush();
403
+ if (limited == null) {
404
+ let lastKey: Uint8Array | undefined;
405
+ if (query instanceof BatchedSnapshotQuery) {
406
+ lastKey = query.getLastKeySerialized();
407
+ }
408
+ if (lastCountTime < performance.now() - 10 * 60 * 1000) {
409
+ // Even though we're doing the snapshot inside a transaction, the transaction uses
410
+ // the default "Read Committed" isolation level. This means we can get new data
411
+ // within the transaction, so we re-estimate the count every 10 minutes when replicating
412
+ // large tables.
413
+ totalEstimatedCount = await this.estimatedCountNumber(table, transaction);
414
+ lastCountTime = performance.now();
415
+ }
416
+ const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, {
417
+ lastKey: lastKey,
418
+ replicatedCount: replicatedCount,
419
+ totalEstimatedCount: totalEstimatedCount
420
+ });
421
+ this.tableCache.updateSourceTable(updatedSourceTable);
422
+
423
+ this.logger.info(`Replicating ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`);
424
+ } else {
425
+ this.logger.info(`Replicating ${table.toQualifiedName()} ${replicatedCount}/${limited.length} for resnapshot`);
426
+ }
427
+
428
+ if (this.abortSignal.aborted) {
429
+ // We only abort after flushing
430
+ throw new ReplicationAbortedError(`Initial replication interrupted`);
431
+ }
432
+
433
+ // When the batch of rows is smaller than the requested batch size we know it is the final batch
434
+ if (batchReplicatedCount < this.snapshotBatchSize) {
435
+ hasRemainingData = false;
436
+ }
437
+ }
438
+ }
439
+
440
+ /**
441
+ * Estimate the number of rows in a table. This query uses partition stats view to get a fast estimate of the row count.
442
+ * This requires that the MSSQL DB user has the VIEW DATABASE PERFORMANCE STATE permission.
443
+ * @param table
444
+ * @param transaction
445
+ */
446
+ async estimatedCountNumber(table: MSSQLSourceTable, transaction?: sql.Transaction): Promise<number> {
447
+ const request = transaction ? transaction.request() : await this.connections.createRequest();
448
+ const { recordset: result } = await request.query(
449
+ `SELECT SUM(row_count) AS total_rows
450
+ FROM sys.dm_db_partition_stats
451
+ WHERE object_id = OBJECT_ID('${table.toQualifiedName()}')
452
+ AND index_id < 2;`
453
+ );
454
+ // TODO Fallback query in case user does not have permission?
455
+ return result[0].total_rows ?? -1;
456
+ }
457
+
458
+ /**
459
+ * Start initial replication.
460
+ *
461
+ * If (partial) replication was done before on this slot, this clears the state
462
+ * and starts again from scratch.
463
+ */
464
+ async startInitialReplication(snapshotStatus: SnapshotStatusResult) {
465
+ let { status, snapshotLSN } = snapshotStatus;
466
+
467
+ if (status === SnapshotStatus.RESTART_REQUIRED) {
468
+ this.logger.info(`Snapshot restart required, clearing state.`);
469
+ // This happens if the last replicated checkpoint LSN is no longer available in the CDC tables.
470
+ await this.storage.clear({ signal: this.abortSignal });
471
+ }
472
+
473
+ await this.storage.startBatch(
474
+ {
475
+ logger: this.logger,
476
+ zeroLSN: LSN.ZERO,
477
+ defaultSchema: this.defaultSchema,
478
+ storeCurrentData: false,
479
+ skipExistingRows: true
480
+ },
481
+ async (batch) => {
482
+ if (snapshotLSN == null) {
483
+ // First replication attempt - set the snapshot LSN to the current LSN before starting
484
+ snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString();
485
+ await batch.setResumeLsn(snapshotLSN);
486
+ const latestLSN = (await getLatestLSN(this.connections)).toString();
487
+ this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`);
488
+ } else {
489
+ this.logger.info(`Resuming snapshot at ${snapshotLSN}.`);
490
+ }
491
+
492
+ const tablesToSnapshot: MSSQLSourceTable[] = [];
493
+ for (const table of this.tableCache.getAll()) {
494
+ if (table.sourceTable.snapshotComplete) {
495
+ this.logger.info(`Skipping table [${table.toQualifiedName()}] - snapshot already done.`);
496
+ continue;
497
+ }
498
+
499
+ const count = await this.estimatedCountNumber(table);
500
+ const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, {
501
+ totalEstimatedCount: count
502
+ });
503
+ this.tableCache.updateSourceTable(updatedSourceTable);
504
+ tablesToSnapshot.push(table);
505
+
506
+ this.logger.info(`To replicate: ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`);
507
+ }
508
+
509
+ for (const table of tablesToSnapshot) {
510
+ await this.snapshotTableInTx(batch, table);
511
+ this.touch();
512
+ }
513
+
514
+ // This will not create a consistent checkpoint yet, but will persist the op.
515
+ // Actual checkpoint will be created when streaming replication caught up.
516
+ await batch.commit(snapshotLSN);
517
+
518
+ this.logger.info(
519
+ `Snapshot done. Need to replicate from ${snapshotLSN} to ${batch.noCheckpointBeforeLsn} to be consistent`
520
+ );
521
+ }
522
+ );
523
+ }
524
+
525
+ async initReplication() {
526
+ const errors = await checkSourceConfiguration(this.connections);
527
+ if (errors.length > 0) {
528
+ throw new CDCConfigurationError(`CDC Configuration Errors: ${errors.join(', ')}`);
529
+ }
530
+
531
+ await this.populateTableCache();
532
+ const snapshotStatus = await this.checkSnapshotStatus();
533
+ if (snapshotStatus.status !== SnapshotStatus.DONE) {
534
+ await this.startInitialReplication(snapshotStatus);
535
+ }
536
+ }
537
+
538
+ /**
539
+ * Checks if the initial sync has already been completed and if updates from the last checkpoint are still available
540
+ * in the CDC instances.
541
+ */
542
+ private async checkSnapshotStatus(): Promise<SnapshotStatusResult> {
543
+ const status = await this.storage.getStatus();
544
+ if (status.snapshot_done && status.checkpoint_lsn) {
545
+ // Snapshot is done, but we still need to check that the last known checkpoint LSN is still
546
+ // within the threshold of the CDC tables
547
+ this.logger.info(`Initial replication already done`);
548
+
549
+ const lastCheckpointLSN = LSN.fromString(status.checkpoint_lsn);
550
+ // Check that the CDC tables still have valid data
551
+ const isAvailable = await isWithinRetentionThreshold({
552
+ checkpointLSN: lastCheckpointLSN,
553
+ tables: this.tableCache.getAll(),
554
+ connectionManager: this.connections
555
+ });
556
+ if (!isAvailable) {
557
+ this.logger.warn(
558
+ `Updates from the last checkpoint are no longer available in the CDC instance, starting initial replication again.`
559
+ );
560
+ }
561
+ return { status: isAvailable ? SnapshotStatus.DONE : SnapshotStatus.RESTART_REQUIRED, snapshotLSN: null };
562
+ } else {
563
+ return { status: SnapshotStatus.IN_PROGRESS, snapshotLSN: status.snapshot_lsn };
564
+ }
565
+ }
566
+
567
+ async streamChanges() {
568
+ await this.storage.startBatch(
569
+ {
570
+ logger: this.logger,
571
+ zeroLSN: LSN.ZERO,
572
+ defaultSchema: this.defaultSchema,
573
+ storeCurrentData: false,
574
+ skipExistingRows: false
575
+ },
576
+ async (batch) => {
577
+ if (batch.resumeFromLsn == null) {
578
+ throw new ReplicationAssertionError(`No LSN found to resume replication from.`);
579
+ }
580
+ const startLSN = LSN.fromString(batch.resumeFromLsn);
581
+ const sourceTables: MSSQLSourceTable[] = this.tableCache.getAll();
582
+ const eventHandler = this.createEventHandler(batch);
583
+
584
+ const poller = new CDCPoller({
585
+ connectionManager: this.connections,
586
+ eventHandler,
587
+ sourceTables,
588
+ startLSN,
589
+ pollingOptions: this.options.pollingOptions,
590
+ logger: this.logger
591
+ });
592
+
593
+ this.abortSignal.addEventListener(
594
+ 'abort',
595
+ async () => {
596
+ await poller.stop();
597
+ },
598
+ { once: true }
599
+ );
600
+
601
+ await createCheckpoint(this.connections);
602
+
603
+ this.logger.info(`Streaming changes from: ${startLSN}`);
604
+ await poller.replicateUntilStopped();
605
+ }
606
+ );
607
+ }
608
+
609
+ private createEventHandler(batch: storage.BucketStorageBatch): CDCEventHandler {
610
+ return {
611
+ onInsert: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => {
612
+ const afterRow = this.toSqliteRow(row, columns);
613
+ await batch.save({
614
+ tag: storage.SaveOperationTag.INSERT,
615
+ sourceTable: table.sourceTable,
616
+ before: undefined,
617
+ beforeReplicaId: undefined,
618
+ after: afterRow,
619
+ afterReplicaId: getUuidReplicaIdentityBson(afterRow, table.sourceTable.replicaIdColumns)
620
+ });
621
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
622
+ },
623
+ onUpdate: async (rowAfter: any, rowBefore: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => {
624
+ const beforeRow = this.toSqliteRow(rowBefore, columns);
625
+ const afterRow = this.toSqliteRow(rowAfter, columns);
626
+ await batch.save({
627
+ tag: storage.SaveOperationTag.UPDATE,
628
+ sourceTable: table.sourceTable,
629
+ before: beforeRow,
630
+ beforeReplicaId: getUuidReplicaIdentityBson(beforeRow, table.sourceTable.replicaIdColumns),
631
+ after: afterRow,
632
+ afterReplicaId: getUuidReplicaIdentityBson(afterRow, table.sourceTable.replicaIdColumns)
633
+ });
634
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
635
+ },
636
+ onDelete: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => {
637
+ const beforeRow = this.toSqliteRow(row, columns);
638
+ await batch.save({
639
+ tag: storage.SaveOperationTag.DELETE,
640
+ sourceTable: table.sourceTable,
641
+ before: beforeRow,
642
+ beforeReplicaId: getUuidReplicaIdentityBson(beforeRow, table.sourceTable.replicaIdColumns),
643
+ after: undefined,
644
+ afterReplicaId: undefined
645
+ });
646
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
647
+ },
648
+ onCommit: async (lsn: string, transactionCount: number) => {
649
+ await batch.commit(lsn);
650
+ this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(transactionCount);
651
+ this.isStartingReplication = false;
652
+ },
653
+ onSchemaChange: async () => {
654
+ // TODO: Handle schema changes
655
+ }
656
+ };
657
+ }
658
+
659
+ /**
660
+ * Convert CDC row data to SqliteRow format.
661
+ * CDC rows include table columns plus CDC metadata columns (__$operation, __$start_lsn, etc.).
662
+ * We filter out the CDC metadata columns.
663
+ */
664
+ private toSqliteRow(row: any, columns: sql.IColumnMetadata): SqliteRow {
665
+ const inputRow: SqliteInputRow = CDCToSqliteRow({ row, columns });
666
+
667
+ return this.syncRules.applyRowContext<never>(inputRow);
668
+ }
669
+
670
+ async getReplicationLagMillis(): Promise<number | undefined> {
671
+ if (this.oldestUncommittedChange == null) {
672
+ if (this.isStartingReplication) {
673
+ // We don't have anything to compute replication lag with yet.
674
+ return undefined;
675
+ } else {
676
+ // We don't have any uncommitted changes, so replication is up-to-date.
677
+ return 0;
678
+ }
679
+ }
680
+ return Date.now() - this.oldestUncommittedChange.getTime();
681
+ }
682
+
683
+ private touch() {
684
+ container.probes.touch().catch((e) => {
685
+ this.logger.error(`Error touching probe`, e);
686
+ });
687
+ }
688
+ }