@powersync/service-module-mongodb-storage 0.15.4 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +1 -1
  3. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -1
  4. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +2 -2
  5. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -1
  6. package/dist/storage/MongoBucketStorage.d.ts +8 -6
  7. package/dist/storage/MongoBucketStorage.js +153 -66
  8. package/dist/storage/MongoBucketStorage.js.map +1 -1
  9. package/dist/storage/implementation/BucketDefinitionMapping.d.ts +15 -0
  10. package/dist/storage/implementation/BucketDefinitionMapping.js +58 -0
  11. package/dist/storage/implementation/BucketDefinitionMapping.js.map +1 -0
  12. package/dist/storage/implementation/CheckpointState.d.ts +20 -0
  13. package/dist/storage/implementation/CheckpointState.js +31 -0
  14. package/dist/storage/implementation/CheckpointState.js.map +1 -0
  15. package/dist/storage/implementation/MongoBucketBatch.d.ts +48 -35
  16. package/dist/storage/implementation/MongoBucketBatch.js +118 -379
  17. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  18. package/dist/storage/implementation/MongoBucketBatchShared.d.ts +5 -0
  19. package/dist/storage/implementation/MongoBucketBatchShared.js +8 -0
  20. package/dist/storage/implementation/MongoBucketBatchShared.js.map +1 -0
  21. package/dist/storage/implementation/MongoChecksums.d.ts +29 -17
  22. package/dist/storage/implementation/MongoChecksums.js +13 -72
  23. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  24. package/dist/storage/implementation/MongoCompactor.d.ts +98 -58
  25. package/dist/storage/implementation/MongoCompactor.js +229 -296
  26. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  27. package/dist/storage/implementation/MongoParameterCompactor.d.ts +11 -6
  28. package/dist/storage/implementation/MongoParameterCompactor.js +11 -8
  29. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
  30. package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +14 -0
  31. package/dist/storage/implementation/MongoPersistedSyncRules.js +67 -0
  32. package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
  33. package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +22 -5
  34. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +56 -13
  35. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
  36. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +61 -32
  37. package/dist/storage/implementation/MongoSyncBucketStorage.js +85 -523
  38. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  39. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +10 -4
  40. package/dist/storage/implementation/MongoSyncRulesLock.js +19 -13
  41. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
  42. package/dist/storage/implementation/MongoWriteCheckpointAPI.js +1 -1
  43. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  44. package/dist/storage/implementation/OperationBatch.js +1 -1
  45. package/dist/storage/implementation/SyncRuleStateUpdate.d.ts +14 -0
  46. package/dist/storage/implementation/SyncRuleStateUpdate.js +36 -0
  47. package/dist/storage/implementation/SyncRuleStateUpdate.js.map +1 -0
  48. package/dist/storage/implementation/common/BucketDataDoc.d.ts +35 -0
  49. package/dist/storage/implementation/common/BucketDataDoc.js +2 -0
  50. package/dist/storage/implementation/common/BucketDataDoc.js.map +1 -0
  51. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.d.ts +13 -0
  52. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js +2 -0
  53. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js.map +1 -0
  54. package/dist/storage/implementation/common/PersistedBatch.d.ts +108 -0
  55. package/dist/storage/implementation/common/PersistedBatch.js +237 -0
  56. package/dist/storage/implementation/common/PersistedBatch.js.map +1 -0
  57. package/dist/storage/implementation/common/SingleBucketStore.d.ts +54 -0
  58. package/dist/storage/implementation/common/SingleBucketStore.js +3 -0
  59. package/dist/storage/implementation/common/SingleBucketStore.js.map +1 -0
  60. package/dist/storage/implementation/common/SourceRecordStore.d.ts +35 -0
  61. package/dist/storage/implementation/common/SourceRecordStore.js +2 -0
  62. package/dist/storage/implementation/common/SourceRecordStore.js.map +1 -0
  63. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.d.ts +27 -0
  64. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js +57 -0
  65. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js.map +1 -0
  66. package/dist/storage/implementation/createMongoSyncBucketStorage.d.ts +7 -0
  67. package/dist/storage/implementation/createMongoSyncBucketStorage.js +9 -0
  68. package/dist/storage/implementation/createMongoSyncBucketStorage.js.map +1 -0
  69. package/dist/storage/implementation/db.d.ts +41 -36
  70. package/dist/storage/implementation/db.js +77 -99
  71. package/dist/storage/implementation/db.js.map +1 -1
  72. package/dist/storage/implementation/models.d.ts +79 -66
  73. package/dist/storage/implementation/models.js +20 -1
  74. package/dist/storage/implementation/models.js.map +1 -1
  75. package/dist/storage/implementation/v1/MongoBucketBatchV1.d.ts +27 -0
  76. package/dist/storage/implementation/v1/MongoBucketBatchV1.js +407 -0
  77. package/dist/storage/implementation/v1/MongoBucketBatchV1.js.map +1 -0
  78. package/dist/storage/implementation/v1/MongoChecksumsV1.d.ts +12 -0
  79. package/dist/storage/implementation/v1/MongoChecksumsV1.js +56 -0
  80. package/dist/storage/implementation/v1/MongoChecksumsV1.js.map +1 -0
  81. package/dist/storage/implementation/v1/MongoCompactorV1.d.ts +23 -0
  82. package/dist/storage/implementation/v1/MongoCompactorV1.js +52 -0
  83. package/dist/storage/implementation/v1/MongoCompactorV1.js.map +1 -0
  84. package/dist/storage/implementation/v1/MongoParameterCompactorV1.d.ts +9 -0
  85. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js +20 -0
  86. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js.map +1 -0
  87. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.d.ts +50 -0
  88. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js +354 -0
  89. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js.map +1 -0
  90. package/dist/storage/implementation/v1/PersistedBatchV1.d.ts +25 -0
  91. package/dist/storage/implementation/v1/PersistedBatchV1.js +183 -0
  92. package/dist/storage/implementation/v1/PersistedBatchV1.js.map +1 -0
  93. package/dist/storage/implementation/v1/SingleBucketStoreV1.d.ts +18 -0
  94. package/dist/storage/implementation/v1/SingleBucketStoreV1.js +57 -0
  95. package/dist/storage/implementation/v1/SingleBucketStoreV1.js.map +1 -0
  96. package/dist/storage/implementation/v1/SourceRecordStoreV1.d.ts +19 -0
  97. package/dist/storage/implementation/v1/SourceRecordStoreV1.js +105 -0
  98. package/dist/storage/implementation/v1/SourceRecordStoreV1.js.map +1 -0
  99. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.d.ts +12 -0
  100. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js +20 -0
  101. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js.map +1 -0
  102. package/dist/storage/implementation/v1/models.d.ts +45 -0
  103. package/dist/storage/implementation/v1/models.js +37 -0
  104. package/dist/storage/implementation/v1/models.js.map +1 -0
  105. package/dist/storage/implementation/v3/MongoBucketBatchV3.d.ts +30 -0
  106. package/dist/storage/implementation/v3/MongoBucketBatchV3.js +463 -0
  107. package/dist/storage/implementation/v3/MongoBucketBatchV3.js.map +1 -0
  108. package/dist/storage/implementation/v3/MongoChecksumsV3.d.ts +15 -0
  109. package/dist/storage/implementation/v3/MongoChecksumsV3.js +84 -0
  110. package/dist/storage/implementation/v3/MongoChecksumsV3.js.map +1 -0
  111. package/dist/storage/implementation/v3/MongoCompactorV3.d.ts +23 -0
  112. package/dist/storage/implementation/v3/MongoCompactorV3.js +68 -0
  113. package/dist/storage/implementation/v3/MongoCompactorV3.js.map +1 -0
  114. package/dist/storage/implementation/v3/MongoParameterCompactorV3.d.ts +9 -0
  115. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js +18 -0
  116. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js.map +1 -0
  117. package/dist/storage/implementation/v3/MongoParameterLookupV3.d.ts +4 -0
  118. package/dist/storage/implementation/v3/MongoParameterLookupV3.js +9 -0
  119. package/dist/storage/implementation/v3/MongoParameterLookupV3.js.map +1 -0
  120. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.d.ts +63 -0
  121. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js +508 -0
  122. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js.map +1 -0
  123. package/dist/storage/implementation/v3/PersistedBatchV3.d.ts +28 -0
  124. package/dist/storage/implementation/v3/PersistedBatchV3.js +259 -0
  125. package/dist/storage/implementation/v3/PersistedBatchV3.js.map +1 -0
  126. package/dist/storage/implementation/v3/SingleBucketStoreV3.d.ts +18 -0
  127. package/dist/storage/implementation/v3/SingleBucketStoreV3.js +48 -0
  128. package/dist/storage/implementation/v3/SingleBucketStoreV3.js.map +1 -0
  129. package/dist/storage/implementation/v3/SourceRecordStoreV3.d.ts +22 -0
  130. package/dist/storage/implementation/v3/SourceRecordStoreV3.js +164 -0
  131. package/dist/storage/implementation/v3/SourceRecordStoreV3.js.map +1 -0
  132. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.d.ts +22 -0
  133. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js +74 -0
  134. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js.map +1 -0
  135. package/dist/storage/implementation/v3/models.d.ts +101 -0
  136. package/dist/storage/implementation/v3/models.js +34 -0
  137. package/dist/storage/implementation/v3/models.js.map +1 -0
  138. package/dist/storage/storage-index.d.ts +6 -3
  139. package/dist/storage/storage-index.js +6 -3
  140. package/dist/storage/storage-index.js.map +1 -1
  141. package/dist/utils/util.d.ts +10 -3
  142. package/dist/utils/util.js +24 -3
  143. package/dist/utils/util.js.map +1 -1
  144. package/package.json +9 -9
  145. package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +1 -1
  146. package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +7 -7
  147. package/src/storage/MongoBucketStorage.ts +254 -99
  148. package/src/storage/implementation/BucketDefinitionMapping.ts +75 -0
  149. package/src/storage/implementation/CheckpointState.ts +59 -0
  150. package/src/storage/implementation/MongoBucketBatch.ts +182 -490
  151. package/src/storage/implementation/MongoBucketBatchShared.ts +11 -0
  152. package/src/storage/implementation/MongoChecksums.ts +53 -75
  153. package/src/storage/implementation/MongoCompactor.ts +374 -404
  154. package/src/storage/implementation/MongoParameterCompactor.ts +37 -24
  155. package/src/storage/implementation/MongoPersistedSyncRules.ts +82 -0
  156. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +78 -16
  157. package/src/storage/implementation/MongoSyncBucketStorage.ts +179 -628
  158. package/src/storage/implementation/MongoSyncRulesLock.ts +20 -16
  159. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +3 -1
  160. package/src/storage/implementation/OperationBatch.ts +1 -1
  161. package/src/storage/implementation/SyncRuleStateUpdate.ts +38 -0
  162. package/src/storage/implementation/common/BucketDataDoc.ts +37 -0
  163. package/src/storage/implementation/common/MongoSyncBucketStorageContext.ts +15 -0
  164. package/src/storage/implementation/common/PersistedBatch.ts +364 -0
  165. package/src/storage/implementation/common/SingleBucketStore.ts +63 -0
  166. package/src/storage/implementation/common/SourceRecordStore.ts +48 -0
  167. package/src/storage/implementation/common/VersionedPowerSyncMongoBase.ts +80 -0
  168. package/src/storage/implementation/createMongoSyncBucketStorage.ts +25 -0
  169. package/src/storage/implementation/db.ts +110 -131
  170. package/src/storage/implementation/models.ts +102 -79
  171. package/src/storage/implementation/v1/MongoBucketBatchV1.ts +509 -0
  172. package/src/storage/implementation/v1/MongoChecksumsV1.ts +75 -0
  173. package/src/storage/implementation/v1/MongoCompactorV1.ts +93 -0
  174. package/src/storage/implementation/v1/MongoParameterCompactorV1.ts +26 -0
  175. package/src/storage/implementation/v1/MongoSyncBucketStorageV1.ts +543 -0
  176. package/src/storage/implementation/v1/PersistedBatchV1.ts +229 -0
  177. package/src/storage/implementation/v1/SingleBucketStoreV1.ts +74 -0
  178. package/src/storage/implementation/v1/SourceRecordStoreV1.ts +156 -0
  179. package/src/storage/implementation/v1/VersionedPowerSyncMongoV1.ts +28 -0
  180. package/src/storage/implementation/v1/models.ts +99 -0
  181. package/src/storage/implementation/v3/MongoBucketBatchV3.ts +607 -0
  182. package/src/storage/implementation/v3/MongoChecksumsV3.ts +120 -0
  183. package/src/storage/implementation/v3/MongoCompactorV3.ts +107 -0
  184. package/src/storage/implementation/v3/MongoParameterCompactorV3.ts +24 -0
  185. package/src/storage/implementation/v3/MongoParameterLookupV3.ts +11 -0
  186. package/src/storage/implementation/v3/MongoSyncBucketStorageV3.ts +678 -0
  187. package/src/storage/implementation/v3/PersistedBatchV3.ts +317 -0
  188. package/src/storage/implementation/v3/SingleBucketStoreV3.ts +68 -0
  189. package/src/storage/implementation/v3/SourceRecordStoreV3.ts +226 -0
  190. package/src/storage/implementation/v3/VersionedPowerSyncMongoV3.ts +117 -0
  191. package/src/storage/implementation/v3/models.ts +164 -0
  192. package/src/storage/storage-index.ts +6 -3
  193. package/src/utils/util.ts +34 -5
  194. package/test/src/storage_compacting.test.ts +57 -29
  195. package/test/src/storage_sync.test.ts +767 -5
  196. package/test/src/storeCurrentData.test.ts +211 -0
  197. package/test/tsconfig.json +0 -1
  198. package/tsconfig.tsbuildinfo +1 -1
  199. package/dist/storage/implementation/PersistedBatch.d.ts +0 -71
  200. package/dist/storage/implementation/PersistedBatch.js +0 -354
  201. package/dist/storage/implementation/PersistedBatch.js.map +0 -1
  202. package/src/storage/implementation/PersistedBatch.ts +0 -432
@@ -3,7 +3,7 @@ import { mongo } from '@powersync/lib-service-mongodb';
3
3
  import {
4
4
  BaseObserver,
5
5
  DO_NOT_LOG,
6
- logger,
6
+ Logger,
7
7
  ReplicationAbortedError,
8
8
  ServiceAssertionError
9
9
  } from '@powersync/lib-services-framework';
@@ -11,48 +11,49 @@ import {
11
11
  BroadcastIterable,
12
12
  CHECKPOINT_INVALIDATE_ALL,
13
13
  CheckpointChanges,
14
- deserializeParameterLookup,
15
14
  GetCheckpointChangesOptions,
16
15
  InternalOpId,
17
- internalToExternalOpId,
18
- maxLsn,
19
16
  mergeAsyncIterables,
20
17
  PopulateChecksumCacheOptions,
21
18
  PopulateChecksumCacheResults,
22
- ProtocolOpId,
23
19
  ReplicationCheckpoint,
24
20
  storage,
25
21
  utils,
26
22
  WatchWriteCheckpointOptions
27
23
  } from '@powersync/service-core';
28
- import { JSONBig } from '@powersync/service-jsonbig';
29
- import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules';
24
+ import { HydratedSyncConfig, ParameterLookupRows, ScopedParameterLookup } from '@powersync/service-sync-rules';
30
25
  import * as bson from 'bson';
31
26
  import { LRUCache } from 'lru-cache';
32
27
  import * as timers from 'timers/promises';
33
- import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js';
28
+ import { retryOnMongoMaxTimeMSExpired } from '../../utils/util.js';
34
29
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
35
- import { VersionedPowerSyncMongo } from './db.js';
36
- import {
37
- BucketDataDocument,
38
- BucketDataKey,
39
- BucketStateDocument,
40
- SourceKey,
41
- SourceTableDocument,
42
- StorageConfig
43
- } from './models.js';
44
- import { MongoBucketBatch } from './MongoBucketBatch.js';
30
+ import { MongoSyncBucketStorageContext } from './common/MongoSyncBucketStorageContext.js';
31
+ import type { VersionedPowerSyncMongo } from './db.js';
32
+ import { StorageConfig } from './models.js';
33
+ import { MongoBucketBatchOptions } from './MongoBucketBatch.js';
45
34
  import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
46
- import { MongoCompactor } from './MongoCompactor.js';
35
+ import { MongoCompactOptions, MongoCompactor } from './MongoCompactor.js';
47
36
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
48
- import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js';
37
+ import { MongoPersistedSyncRulesContentV1 } from './MongoPersistedSyncRulesContent.js';
49
38
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
50
39
 
51
40
  export interface MongoSyncBucketStorageOptions {
52
- checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig'>;
41
+ checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig' | 'mapping'>;
53
42
  storageConfig: StorageConfig;
54
43
  }
55
44
 
45
+ interface InternalCheckpointChanges extends CheckpointChanges {
46
+ updatedWriteCheckpoints: Map<string, bigint>;
47
+ invalidateWriteCheckpoints: boolean;
48
+ }
49
+
50
+ interface WriterSyncState {
51
+ lastCheckpointLsn: string | null;
52
+ resumeFromLsn: string | null;
53
+ keepaliveOp: InternalOpId | null;
54
+ syncConfigId?: bson.ObjectId | null;
55
+ }
56
+
56
57
  /**
57
58
  * Only keep checkpoints around for a minute, before fetching a fresh one.
58
59
  *
@@ -64,43 +65,70 @@ export interface MongoSyncBucketStorageOptions {
64
65
  */
65
66
  const CHECKPOINT_TIMEOUT_MS = 60_000;
66
67
 
67
- export class MongoSyncBucketStorage
68
+ export abstract class MongoSyncBucketStorage
68
69
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
69
70
  implements storage.SyncRulesBucketStorage
70
71
  {
72
+ readonly db: VersionedPowerSyncMongo;
71
73
  [DO_NOT_LOG] = true;
72
74
 
73
- private readonly db: VersionedPowerSyncMongo;
74
75
  readonly checksums: MongoChecksums;
75
76
 
76
- private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
77
+ private parsedSyncRulesCache: { parsed: HydratedSyncConfig; options: storage.ParseSyncRulesOptions } | undefined;
77
78
  private writeCheckpointAPI: MongoWriteCheckpointAPI;
79
+ public readonly logger: Logger;
80
+ public readonly storageConfig: StorageConfig;
81
+ #storageInitialized = false;
78
82
 
79
83
  constructor(
80
84
  public readonly factory: MongoBucketStorage,
81
85
  public readonly group_id: number,
82
- private readonly sync_rules: MongoPersistedSyncRulesContent,
86
+ protected readonly sync_rules: MongoPersistedSyncRulesContentV1,
83
87
  public readonly slot_name: string,
84
88
  writeCheckpointMode: storage.WriteCheckpointMode | undefined,
85
89
  options: MongoSyncBucketStorageOptions
86
90
  ) {
87
91
  super();
88
- this.db = factory.db.versioned(sync_rules.getStorageConfig());
89
- this.checksums = new MongoChecksums(this.db, this.group_id, {
90
- ...options.checksumOptions,
91
- storageConfig: options?.storageConfig
92
- });
92
+ this.storageConfig = options.storageConfig;
93
+ this.db = factory.db.versioned(this.storageConfig);
94
+ this.checksums = this.createMongoChecksums(options);
93
95
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
94
96
  db: this.db,
95
97
  mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
96
98
  sync_rules_id: group_id
97
99
  });
100
+ this.logger = sync_rules.logger;
98
101
  }
99
102
 
103
+ /**
104
+ * Not for external use - public here for tests only.
105
+ *
106
+ * @internal
107
+ */
108
+ abstract createMongoCompactor(options: MongoCompactOptions): MongoCompactor;
109
+
110
+ protected abstract createMongoChecksums(options: MongoSyncBucketStorageOptions): MongoChecksums;
111
+ protected abstract createMongoParameterCompactor(
112
+ checkpoint: InternalOpId,
113
+ options: storage.CompactOptions
114
+ ): MongoParameterCompactor;
115
+
100
116
  get writeCheckpointMode() {
101
117
  return this.writeCheckpointAPI.writeCheckpointMode;
102
118
  }
103
119
 
120
+ get mapping() {
121
+ return this.sync_rules.mapping;
122
+ }
123
+
124
+ protected get versionContext(): MongoSyncBucketStorageContext {
125
+ return {
126
+ db: this.db,
127
+ group_id: this.group_id,
128
+ mapping: this.mapping
129
+ };
130
+ }
131
+
104
132
  setWriteCheckpointMode(mode: storage.WriteCheckpointMode): void {
105
133
  this.writeCheckpointAPI.setWriteCheckpointMode(mode);
106
134
  }
@@ -116,14 +144,10 @@ export class MongoSyncBucketStorage
116
144
  });
117
145
  }
118
146
 
119
- getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules {
147
+ getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncConfig {
120
148
  const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {};
121
- /**
122
- * Check if the cached sync rules, if present, had the same options.
123
- * Parse sync rules if the options are different or if there is no cached value.
124
- */
125
149
  if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) {
126
- this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options };
150
+ this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncConfig(), options };
127
151
  }
128
152
 
129
153
  return this.parsedSyncRulesCache!.parsed;
@@ -133,72 +157,67 @@ export class MongoSyncBucketStorage
133
157
  return (await this.getCheckpointInternal()) ?? new EmptyReplicationCheckpoint();
134
158
  }
135
159
 
160
+ protected abstract fetchCheckpointState(
161
+ session: mongo.ClientSession
162
+ ): Promise<{ checkpoint: bigint; lsn: string | null } | null>;
163
+
136
164
  async getCheckpointInternal(): Promise<storage.ReplicationCheckpoint | null> {
137
165
  return await this.db.client.withSession({ snapshot: true }, async (session) => {
138
- const doc = await this.db.sync_rules.findOne(
139
- { _id: this.group_id },
140
- {
141
- session,
142
- projection: { _id: 1, state: 1, last_checkpoint: 1, last_checkpoint_lsn: 1, snapshot_done: 1 }
143
- }
144
- );
145
- if (!doc?.snapshot_done || !['ACTIVE', 'ERRORED'].includes(doc.state)) {
146
- // Sync rules not active - return null
166
+ const state = await this.fetchCheckpointState(session);
167
+ if (state == null) {
147
168
  return null;
148
169
  }
149
170
 
150
- // Specifically using operationTime instead of clusterTime
151
- // There are 3 fields in the response:
152
- // 1. operationTime, not exposed for snapshot sessions (used for causal consistency)
153
- // 2. clusterTime (used for connection management)
154
- // 3. atClusterTime, which is session.snapshotTime
155
- // We use atClusterTime, to match the driver's internal snapshot handling.
156
- // There are cases where clusterTime > operationTime and atClusterTime,
157
- // which could cause snapshot queries using this as the snapshotTime to timeout.
158
- // This was specifically observed on MongoDB 6.0 and 7.0.
159
171
  const snapshotTime = (session as any).snapshotTime as bson.Timestamp | undefined;
160
172
  if (snapshotTime == null) {
161
173
  throw new ServiceAssertionError('Missing snapshotTime in getCheckpoint()');
162
174
  }
163
- return new MongoReplicationCheckpoint(
164
- this,
165
- // null/0n is a valid checkpoint in some cases, for example if the initial snapshot was empty
166
- doc.last_checkpoint ?? 0n,
167
- doc.last_checkpoint_lsn ?? null,
168
- snapshotTime
169
- );
175
+ return new MongoReplicationCheckpoint(this, state.checkpoint, state.lsn, snapshotTime);
170
176
  });
171
177
  }
172
178
 
179
+ protected abstract initializeVersionStorage(): Promise<void>;
180
+
181
+ private async initializeStorage() {
182
+ if (this.#storageInitialized) {
183
+ return;
184
+ }
185
+
186
+ await this.db.initializeStreamStorage(this.group_id);
187
+ await this.initializeVersionStorage();
188
+ this.#storageInitialized = true;
189
+ }
190
+
191
+ protected abstract createWriterImpl(batchOptions: MongoBucketBatchOptions): storage.BucketStorageBatch;
192
+ protected abstract getWriterSyncState(): Promise<WriterSyncState>;
193
+
173
194
  async createWriter(options: storage.CreateWriterOptions): Promise<storage.BucketStorageBatch> {
174
- const doc = await this.db.sync_rules.findOne(
175
- {
176
- _id: this.group_id
177
- },
178
- { projection: { last_checkpoint_lsn: 1, no_checkpoint_before: 1, keepalive_op: 1, snapshot_lsn: 1 } }
179
- );
180
- const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null;
195
+ await this.initializeStorage();
196
+
197
+ const state = await this.getWriterSyncState();
181
198
 
182
- const writer = new MongoBucketBatch({
183
- logger: options.logger,
199
+ const batchOptions: MongoBucketBatchOptions = {
200
+ logger: options.logger ?? this.logger,
184
201
  db: this.db,
185
- syncRules: this.sync_rules.parsed(options).hydratedSyncRules(),
202
+ syncRules: this.sync_rules.parsed(options).hydratedSyncConfig(),
203
+ mapping: this.sync_rules.mapping,
186
204
  groupId: this.group_id,
187
205
  slotName: this.slot_name,
188
- lastCheckpointLsn: checkpoint_lsn,
189
- resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn),
190
- keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null,
206
+ lastCheckpointLsn: state.lastCheckpointLsn,
207
+ resumeFromLsn: state.resumeFromLsn,
208
+ keepaliveOp: state.keepaliveOp,
191
209
  storeCurrentData: options.storeCurrentData,
192
210
  skipExistingRows: options.skipExistingRows ?? false,
193
- markRecordUnavailable: options.markRecordUnavailable
194
- });
211
+ markRecordUnavailable: options.markRecordUnavailable,
212
+ hooks: options.hooks,
213
+ syncConfigId: state.syncConfigId,
214
+ tracer: options.tracer
215
+ };
216
+ const writer = this.createWriterImpl(batchOptions);
195
217
  this.iterateListeners((cb) => cb.batchStarted?.(writer));
196
218
  return writer;
197
219
  }
198
220
 
199
- /**
200
- * @deprecated Use `createWriter()` with `await using` instead.
201
- */
202
221
  async startBatch(
203
222
  options: storage.CreateWriterOptions,
204
223
  callback: (batch: storage.BucketStorageBatch) => Promise<void>
@@ -209,321 +228,32 @@ export class MongoSyncBucketStorage
209
228
  return writer.last_flushed_op != null ? { flushed_op: writer.last_flushed_op } : null;
210
229
  }
211
230
 
212
- async resolveTable(options: storage.ResolveTableOptions): Promise<storage.ResolveTableResult> {
213
- const { group_id, connection_id, connection_tag, entity_descriptor } = options;
214
-
215
- const { schema, name, objectId, replicaIdColumns } = entity_descriptor;
216
-
217
- const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({
218
- name: column.name,
219
- type: column.type,
220
- type_oid: column.typeId
221
- }));
222
- let result: storage.ResolveTableResult | null = null;
223
- await this.db.client.withSession(async (session) => {
224
- const col = this.db.source_tables;
225
- let filter: Partial<SourceTableDocument> = {
226
- group_id: group_id,
227
- connection_id: connection_id,
228
- schema_name: schema,
229
- table_name: name,
230
- replica_id_columns2: normalizedReplicaIdColumns
231
- };
232
- if (objectId != null) {
233
- filter.relation_id = objectId;
234
- }
235
- let doc = await col.findOne(filter, { session });
236
- if (doc == null) {
237
- doc = {
238
- _id: new bson.ObjectId(),
239
- group_id: group_id,
240
- connection_id: connection_id,
241
- relation_id: objectId,
242
- schema_name: schema,
243
- table_name: name,
244
- replica_id_columns: null,
245
- replica_id_columns2: normalizedReplicaIdColumns,
246
- snapshot_done: false,
247
- snapshot_status: undefined
248
- };
249
-
250
- await col.insertOne(doc, { session });
251
- }
252
- const sourceTable = new storage.SourceTable({
253
- id: doc._id,
254
- connectionTag: connection_tag,
255
- objectId: objectId,
256
- schema: schema,
257
- name: name,
258
- replicaIdColumns: replicaIdColumns,
259
- snapshotComplete: doc.snapshot_done ?? true
260
- });
261
- sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable);
262
- sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable);
263
- sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable);
264
- sourceTable.snapshotStatus =
265
- doc.snapshot_status == null
266
- ? undefined
267
- : {
268
- lastKey: doc.snapshot_status.last_key?.buffer ?? null,
269
- totalEstimatedCount: doc.snapshot_status.total_estimated_count,
270
- replicatedCount: doc.snapshot_status.replicated_count
271
- };
272
-
273
- let dropTables: storage.SourceTable[] = [];
274
- // Detect tables that are either renamed, or have different replica_id_columns
275
- let truncateFilter = [{ schema_name: schema, table_name: name }] as any[];
276
- if (objectId != null) {
277
- // Only detect renames if the source uses relation ids.
278
- truncateFilter.push({ relation_id: objectId });
279
- }
280
- const truncate = await col
281
- .find(
282
- {
283
- group_id: group_id,
284
- connection_id: connection_id,
285
- _id: { $ne: doc._id },
286
- $or: truncateFilter
287
- },
288
- { session }
289
- )
290
- .toArray();
291
- dropTables = truncate.map(
292
- (doc) =>
293
- new storage.SourceTable({
294
- id: doc._id,
295
- connectionTag: connection_tag,
296
- objectId: doc.relation_id,
297
- schema: doc.schema_name,
298
- name: doc.table_name,
299
- replicaIdColumns:
300
- doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [],
301
- snapshotComplete: doc.snapshot_done ?? true
302
- })
303
- );
304
-
305
- result = {
306
- table: sourceTable,
307
- dropTables: dropTables
308
- };
309
- });
310
- return result!;
311
- }
231
+ protected abstract getParameterSetsImpl(
232
+ checkpoint: MongoReplicationCheckpoint,
233
+ lookups: ScopedParameterLookup[],
234
+ limit: number
235
+ ): Promise<ParameterLookupRows[]>;
312
236
 
313
237
  async getParameterSets(
314
238
  checkpoint: MongoReplicationCheckpoint,
315
- lookups: ScopedParameterLookup[]
316
- ): Promise<SqliteJsonRow[]> {
317
- return this.db.client.withSession({ snapshot: true }, async (session) => {
318
- // Set the session's snapshot time to the checkpoint's snapshot time.
319
- // An alternative would be to create the session when the checkpoint is created, but managing
320
- // the session lifetime would become more complex.
321
- // Starting and ending sessions are cheap (synchronous when no transactions are used),
322
- // so this should be fine.
323
- // This is a roundabout way of setting {readConcern: {atClusterTime: clusterTime}}, since
324
- // that is not exposed directly by the driver.
325
- // Future versions of the driver may change the snapshotTime behavior, so we need tests to
326
- // validate that this works as expected. We test this in the compacting tests.
327
- setSessionSnapshotTime(session, checkpoint.snapshotTime);
328
- const lookupFilter = lookups.map((lookup) => {
329
- return storage.serializeLookup(lookup);
330
- });
331
- // This query does not use indexes super efficiently, apart from the lookup filter.
332
- // From some experimentation I could do individual lookups more efficient using an index
333
- // on {'key.g': 1, lookup: 1, 'key.t': 1, 'key.k': 1, _id: -1},
334
- // but could not do the same using $group.
335
- // For now, just rely on compacting to remove extraneous data.
336
- // For a description of the data format, see the `/docs/parameters-lookups.md` file.
337
- const rows = await this.db.bucket_parameters
338
- .aggregate(
339
- [
340
- {
341
- $match: {
342
- 'key.g': this.group_id,
343
- lookup: { $in: lookupFilter },
344
- _id: { $lte: checkpoint.checkpoint }
345
- }
346
- },
347
- {
348
- $sort: {
349
- _id: -1
350
- }
351
- },
352
- {
353
- $group: {
354
- _id: { key: '$key', lookup: '$lookup' },
355
- bucket_parameters: {
356
- $first: '$bucket_parameters'
357
- }
358
- }
359
- }
360
- ],
361
- {
362
- session,
363
- readConcern: 'snapshot',
364
- // Limit the time for the operation to complete, to avoid getting connection timeouts
365
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
366
- }
367
- )
368
- .toArray()
369
- .catch((e) => {
370
- throw lib_mongo.mapQueryError(e, 'while evaluating parameter queries');
371
- });
372
- const groupedParameters = rows.map((row) => {
373
- return row.bucket_parameters;
374
- });
375
- return groupedParameters.flat();
376
- });
239
+ lookups: ScopedParameterLookup[],
240
+ limit: number
241
+ ): Promise<ParameterLookupRows[]> {
242
+ return this.getParameterSetsImpl(checkpoint, lookups, limit);
377
243
  }
378
244
 
245
+ protected abstract getBucketDataBatchImpl(
246
+ checkpoint: utils.InternalOpId,
247
+ dataBuckets: storage.BucketDataRequest[],
248
+ options?: storage.BucketDataBatchOptions
249
+ ): AsyncIterable<storage.SyncBucketDataChunk>;
250
+
379
251
  async *getBucketDataBatch(
380
252
  checkpoint: utils.InternalOpId,
381
253
  dataBuckets: storage.BucketDataRequest[],
382
254
  options?: storage.BucketDataBatchOptions
383
255
  ): AsyncIterable<storage.SyncBucketDataChunk> {
384
- if (dataBuckets.length == 0) {
385
- return;
386
- }
387
- let filters: mongo.Filter<BucketDataDocument>[] = [];
388
- const bucketMap = new Map(dataBuckets.map((request) => [request.bucket, request.start]));
389
-
390
- if (checkpoint == null) {
391
- throw new ServiceAssertionError('checkpoint is null');
392
- }
393
- const end = checkpoint;
394
- for (let { bucket: name, start } of dataBuckets) {
395
- filters.push({
396
- _id: {
397
- $gt: {
398
- g: this.group_id,
399
- b: name,
400
- o: start
401
- },
402
- $lte: {
403
- g: this.group_id,
404
- b: name,
405
- o: end as any
406
- }
407
- }
408
- });
409
- }
410
-
411
- // Internal naming:
412
- // We do a query for one "batch", which may consist of multiple "chunks".
413
- // Each chunk is limited to single bucket, and is limited in length and size.
414
- // There are also overall batch length and size limits.
415
-
416
- const batchLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT;
417
- const chunkSizeLimitBytes = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES;
418
-
419
- const cursor = this.db.bucket_data.find(
420
- {
421
- $or: filters
422
- },
423
- {
424
- session: undefined,
425
- sort: { _id: 1 },
426
- limit: batchLimit,
427
- // Increase batch size above the default 101, so that we can fill an entire batch in
428
- // one go.
429
- // batchSize is 1 more than limit to auto-close the cursor.
430
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
431
- batchSize: batchLimit + 1,
432
- // Raw mode is returns an array of Buffer instead of parsed documents.
433
- // We use it so that:
434
- // 1. We can calculate the document size accurately without serializing again.
435
- // 2. We can delay parsing the results until it's needed.
436
- // We manually use bson.deserialize below
437
- raw: true,
438
-
439
- // Limit the time for the operation to complete, to avoid getting connection timeouts
440
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
441
- }
442
- ) as unknown as mongo.FindCursor<Buffer>;
443
-
444
- // We want to limit results to a single batch to avoid high memory usage.
445
- // This approach uses MongoDB's batch limits to limit the data here, which limits
446
- // to the lower of the batch count and size limits.
447
- // This is similar to using `singleBatch: true` in the find options, but allows
448
- // detecting "hasMore".
449
- let { data, hasMore: batchHasMore } = await readSingleBatch(cursor).catch((e) => {
450
- throw lib_mongo.mapQueryError(e, 'while reading bucket data');
451
- });
452
- if (data.length == batchLimit) {
453
- // Limit reached - could have more data, despite the cursor being drained.
454
- batchHasMore = true;
455
- }
456
-
457
- let chunkSizeBytes = 0;
458
- let currentChunk: utils.SyncBucketData | null = null;
459
- let targetOp: InternalOpId | null = null;
460
-
461
- // Ordered by _id, meaning buckets are grouped together
462
- for (let rawData of data) {
463
- const row = bson.deserialize(rawData, storage.BSON_DESERIALIZE_INTERNAL_OPTIONS) as BucketDataDocument;
464
- const bucket = row._id.b;
465
-
466
- if (currentChunk == null || currentChunk.bucket != bucket || chunkSizeBytes >= chunkSizeLimitBytes) {
467
- // We need to start a new chunk
468
- let start: ProtocolOpId | undefined = undefined;
469
- if (currentChunk != null) {
470
- // There is an existing chunk we need to yield
471
- if (currentChunk.bucket == bucket) {
472
- // Current and new chunk have the same bucket, so need has_more on the current one.
473
- // If currentChunk.bucket != bucket, then we reached the end of the previous bucket,
474
- // and has_more = false in that case.
475
- currentChunk.has_more = true;
476
- start = currentChunk.next_after;
477
- }
478
-
479
- const yieldChunk = currentChunk;
480
- currentChunk = null;
481
- chunkSizeBytes = 0;
482
- yield { chunkData: yieldChunk, targetOp: targetOp };
483
- targetOp = null;
484
- }
485
-
486
- if (start == null) {
487
- const startOpId = bucketMap.get(bucket);
488
- if (startOpId == null) {
489
- throw new ServiceAssertionError(`data for unexpected bucket: ${bucket}`);
490
- }
491
- start = internalToExternalOpId(startOpId);
492
- }
493
- currentChunk = {
494
- bucket,
495
- after: start,
496
- has_more: false,
497
- data: [],
498
- next_after: start
499
- };
500
- targetOp = null;
501
- }
502
-
503
- const entry = mapOpEntry(row);
504
-
505
- if (row.target_op != null) {
506
- // MOVE, CLEAR
507
- if (targetOp == null || row.target_op > targetOp) {
508
- targetOp = row.target_op;
509
- }
510
- }
511
-
512
- currentChunk.data.push(entry);
513
- currentChunk.next_after = entry.op_id;
514
-
515
- chunkSizeBytes += rawData.byteLength;
516
- }
517
-
518
- if (currentChunk != null) {
519
- const yieldChunk = currentChunk;
520
- currentChunk = null;
521
- // This is the final chunk in the batch.
522
- // There may be more data if and only if the batch we retrieved isn't complete.
523
- yieldChunk.has_more = batchHasMore;
524
- yield { chunkData: yieldChunk, targetOp: targetOp };
525
- targetOp = null;
526
- }
256
+ yield* this.getBucketDataBatchImpl(checkpoint, dataBuckets, options);
527
257
  }
528
258
 
529
259
  async getChecksums(
@@ -537,130 +267,66 @@ export class MongoSyncBucketStorage
537
267
  this.checksums.clearCache();
538
268
  }
539
269
 
270
+ protected abstract terminateSyncRuleState(): Promise<void>;
271
+
540
272
  async terminate(options?: storage.TerminateOptions) {
541
- // Default is to clear the storage except when explicitly requested not to.
542
273
  if (!options || options?.clearStorage) {
543
274
  await this.clear(options);
544
275
  }
545
- await this.db.sync_rules.updateOne(
546
- {
547
- _id: this.group_id
548
- },
549
- {
550
- $set: {
551
- state: storage.SyncRuleState.TERMINATED,
552
- persisted_lsn: null,
553
- snapshot_done: false
554
- }
555
- }
556
- );
276
+ await this.terminateSyncRuleState();
557
277
  await this.db.notifyCheckpoint();
558
278
  }
559
279
 
560
- async getStatus(): Promise<storage.SyncRuleStatus> {
561
- const doc = await this.db.sync_rules.findOne(
562
- {
563
- _id: this.group_id
564
- },
565
- {
566
- projection: {
567
- snapshot_done: 1,
568
- last_checkpoint_lsn: 1,
569
- state: 1,
570
- snapshot_lsn: 1
571
- }
572
- }
573
- );
574
- if (doc == null) {
575
- throw new ServiceAssertionError('Cannot find sync rules status');
576
- }
280
+ protected abstract getStatusImpl(): Promise<storage.SyncRuleStatus>;
577
281
 
578
- return {
579
- snapshot_done: doc.snapshot_done,
580
- snapshot_lsn: doc.snapshot_lsn ?? null,
581
- active: doc.state == 'ACTIVE',
582
- checkpoint_lsn: doc.last_checkpoint_lsn
583
- };
282
+ async getStatus(): Promise<storage.SyncRuleStatus> {
283
+ return this.getStatusImpl();
584
284
  }
585
285
 
586
- async clear(options?: storage.ClearStorageOptions): Promise<void> {
587
- while (true) {
588
- if (options?.signal?.aborted) {
589
- throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason);
590
- }
591
- try {
592
- await this.clearIteration();
286
+ protected abstract clearBucketData(signal?: AbortSignal): Promise<void>;
593
287
 
594
- logger.info(`${this.slot_name} Done clearing data`);
595
- return;
596
- } catch (e: unknown) {
597
- if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
598
- logger.info(
599
- `${this.slot_name} Cleared batch of data in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
600
- );
601
- await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5);
602
- } else {
603
- throw e;
604
- }
605
- }
606
- }
607
- }
288
+ protected abstract clearParameterIndexes(signal?: AbortSignal): Promise<void>;
608
289
 
609
- private async clearIteration(): Promise<void> {
610
- // Individual operations here may time out with the maxTimeMS option.
611
- // It is expected to still make progress, and continue on the next try.
290
+ protected abstract clearSourceRecords(signal?: AbortSignal): Promise<void>;
612
291
 
613
- await this.db.sync_rules.updateOne(
614
- {
615
- _id: this.group_id
616
- },
617
- {
618
- $set: {
619
- snapshot_done: false,
620
- persisted_lsn: null,
621
- last_checkpoint_lsn: null,
622
- last_checkpoint: null,
623
- no_checkpoint_before: null
624
- },
625
- $unset: {
626
- snapshot_lsn: 1
627
- }
628
- },
629
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
630
- );
631
- await this.db.bucket_data.deleteMany(
632
- {
633
- _id: idPrefixFilter<BucketDataKey>({ g: this.group_id }, ['b', 'o'])
634
- },
635
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
636
- );
637
- await this.db.bucket_parameters.deleteMany(
638
- {
639
- 'key.g': this.group_id
640
- },
641
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
642
- );
292
+ protected abstract clearBucketState(signal?: AbortSignal): Promise<void>;
643
293
 
644
- await this.db.common_current_data.deleteMany(
645
- {
646
- _id: idPrefixFilter<SourceKey>({ g: this.group_id }, ['t', 'k'])
647
- },
648
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
649
- );
294
+ protected abstract clearSourceTables(signal?: AbortSignal): Promise<void>;
295
+ protected abstract clearSyncRuleState(): Promise<void>;
650
296
 
651
- await this.db.bucket_state.deleteMany(
652
- {
653
- _id: idPrefixFilter<BucketStateDocument['_id']>({ g: this.group_id }, ['b'])
654
- },
655
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
656
- );
297
+ async clear(options?: storage.ClearStorageOptions): Promise<void> {
298
+ const signal = options?.signal;
657
299
 
658
- await this.db.source_tables.deleteMany(
659
- {
660
- group_id: this.group_id
661
- },
662
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
663
- );
300
+ if (signal?.aborted) {
301
+ throw new ReplicationAbortedError('Aborted clearing data', signal.reason);
302
+ }
303
+
304
+ await this.clearSyncRuleState();
305
+
306
+ await this.clearBucketData(signal);
307
+ await this.clearParameterIndexes(signal);
308
+ await this.clearSourceRecords(signal);
309
+ await this.clearBucketState(signal);
310
+ await this.clearSourceTables(signal);
311
+
312
+ this.#storageInitialized = false;
313
+ }
314
+
315
+ protected async clearDeleteMany(
316
+ label: string,
317
+ operation: () => Promise<mongo.DeleteResult>,
318
+ signal?: AbortSignal
319
+ ): Promise<void> {
320
+ await retryOnMongoMaxTimeMSExpired(operation, {
321
+ signal,
322
+ abortMessage: 'Aborted clearing data',
323
+ retryDelayMs: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5,
324
+ onRetry: () => {
325
+ this.logger.info(
326
+ `Cleared batch of ${label} in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
327
+ );
328
+ }
329
+ });
664
330
  }
665
331
 
666
332
  async reportError(e: any): Promise<void> {
@@ -684,100 +350,68 @@ export class MongoSyncBucketStorage
684
350
  const checkpoint = await this.getCheckpointInternal();
685
351
  maxOpId = checkpoint?.checkpoint ?? undefined;
686
352
  }
687
- await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
353
+ await this.createMongoCompactor({ ...options, maxOpId, logger: this.logger }).compact();
688
354
 
689
355
  if (maxOpId != null && options?.compactParameterData) {
690
- await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
356
+ await this.createMongoParameterCompactor(maxOpId, options).compact();
691
357
  }
692
358
  }
693
359
 
694
360
  async populatePersistentChecksumCache(options: PopulateChecksumCacheOptions): Promise<PopulateChecksumCacheResults> {
695
- logger.info(`Populating persistent checksum cache...`);
361
+ this.logger.info(`Populating persistent checksum cache...`);
696
362
  const start = Date.now();
697
- // We do a minimal compact here.
698
- // We can optimize this in the future.
699
- const compactor = new MongoCompactor(this, this.db, {
363
+ const compactor = this.createMongoCompactor({
700
364
  ...options,
701
- // Don't track updates for MOVE compacting
702
- memoryLimitMB: 0
365
+ memoryLimitMB: 0,
366
+ logger: this.logger
703
367
  });
704
368
 
705
369
  const result = await compactor.populateChecksums({
706
- // There are cases with millions of small buckets, in which case it can take very long to
707
- // populate the checksums, with minimal benefit. We skip the small buckets here.
708
370
  minBucketChanges: options.minBucketChanges ?? 10
709
371
  });
710
372
  const duration = Date.now() - start;
711
- logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
373
+ this.logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
712
374
  return result;
713
375
  }
714
376
 
715
- /**
716
- * Instance-wide watch on the latest available checkpoint (op_id + lsn).
717
- */
718
377
  private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable<ReplicationCheckpoint> {
719
378
  if (signal.aborted) {
720
379
  return;
721
380
  }
722
381
 
723
- // If the stream is idle, we wait a max of a minute (CHECKPOINT_TIMEOUT_MS) before we get another checkpoint,
724
- // to avoid stale checkpoint snapshots. This is what checkpointTimeoutStream() is for.
725
- // Essentially, even if there are no actual checkpoint changes, we want a new snapshotTime every minute or so,
726
- // to ensure that any new clients connecting will get a valid snapshotTime.
727
382
  const stream = mergeAsyncIterables(
728
383
  [this.checkpointChangesStream(signal), this.checkpointTimeoutStream(signal)],
729
384
  signal
730
385
  );
731
386
 
732
- // We only watch changes to the active sync rules.
733
- // If it changes to inactive, we abort and restart with the new sync rules.
734
387
  for await (const _ of stream) {
735
388
  if (signal.aborted) {
736
- // Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
737
389
  break;
738
390
  }
739
391
 
740
392
  const op = await this.getCheckpointInternal();
741
393
  if (op == null) {
742
- // Sync rules have changed - abort and restart.
743
- // We do a soft close of the stream here - no error
744
394
  break;
745
395
  }
746
396
 
747
- // Previously, we only yielded when the checkpoint or lsn changed.
748
- // However, we always want to use the latest snapshotTime, so we skip that filtering here.
749
- // That filtering could be added in the per-user streams if needed, but in general the capped collection
750
- // should already only contain useful changes in most cases.
751
397
  yield op;
752
398
  }
753
399
  }
754
400
 
755
- // Nothing is done here until a subscriber starts to iterate
756
401
  private readonly sharedIter = new BroadcastIterable((signal) => {
757
402
  return this.watchActiveCheckpoint(signal);
758
403
  });
759
404
 
760
- /**
761
- * User-specific watch on the latest checkpoint and/or write checkpoint.
762
- */
763
405
  async *watchCheckpointChanges(options: WatchWriteCheckpointOptions): AsyncIterable<storage.StorageCheckpointUpdate> {
764
406
  let lastCheckpoint: ReplicationCheckpoint | null = null;
765
407
 
766
408
  const iter = this.sharedIter[Symbol.asyncIterator](options.signal);
767
409
 
768
410
  let writeCheckpoint: bigint | null = null;
769
- // true if we queried the initial write checkpoint, even if it doesn't exist
770
411
  let queriedInitialWriteCheckpoint = false;
771
412
 
772
413
  for await (const nextCheckpoint of iter) {
773
- // lsn changes are not important by itself.
774
- // What is important is:
775
- // 1. checkpoint (op_id) changes.
776
- // 2. write checkpoint changes for the specific user
777
-
778
414
  if (nextCheckpoint.lsn != null && !queriedInitialWriteCheckpoint) {
779
- // Lookup the first write checkpoint for the user when we can.
780
- // There will not actually be one in all cases.
781
415
  writeCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
782
416
  sync_rules_id: this.group_id,
783
417
  user_id: options.user_id,
@@ -793,15 +427,11 @@ export class MongoSyncBucketStorage
793
427
  lastCheckpoint.checkpoint == nextCheckpoint.checkpoint &&
794
428
  lastCheckpoint.lsn == nextCheckpoint.lsn
795
429
  ) {
796
- // No change - wait for next one
797
- // In some cases, many LSNs may be produced in a short time.
798
- // Add a delay to throttle the loop a bit.
799
430
  await timers.setTimeout(20 + 10 * Math.random());
800
431
  continue;
801
432
  }
802
433
 
803
434
  if (lastCheckpoint == null) {
804
- // First message for this stream - "INVALIDATE_ALL" means it will lookup all data
805
435
  yield {
806
436
  base: nextCheckpoint,
807
437
  writeCheckpoint,
@@ -815,8 +445,6 @@ export class MongoSyncBucketStorage
815
445
 
816
446
  let updatedWriteCheckpoint = updates.updatedWriteCheckpoints.get(options.user_id) ?? null;
817
447
  if (updates.invalidateWriteCheckpoints) {
818
- // Invalidated means there were too many updates to track the individual ones,
819
- // so we switch to "polling" (querying directly in each stream).
820
448
  updatedWriteCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
821
449
  sync_rules_id: this.group_id,
822
450
  user_id: options.user_id,
@@ -827,8 +455,6 @@ export class MongoSyncBucketStorage
827
455
  }
828
456
  if (updatedWriteCheckpoint != null && (writeCheckpoint == null || updatedWriteCheckpoint > writeCheckpoint)) {
829
457
  writeCheckpoint = updatedWriteCheckpoint;
830
- // If it happened that we haven't queried a write checkpoint at this point,
831
- // then we don't need to anymore, since we got an updated one.
832
458
  queriedInitialWriteCheckpoint = true;
833
459
  }
834
460
 
@@ -848,12 +474,6 @@ export class MongoSyncBucketStorage
848
474
  }
849
475
  }
850
476
 
851
- /**
852
- * This watches the checkpoint_events capped collection for new documents inserted,
853
- * and yields whenever one or more documents are inserted.
854
- *
855
- * The actual checkpoint must be queried on the sync_rules collection after this.
856
- */
857
477
  private async *checkpointChangesStream(signal: AbortSignal): AsyncGenerator<void> {
858
478
  if (signal.aborted) {
859
479
  return;
@@ -872,17 +492,13 @@ export class MongoSyncBucketStorage
872
492
  cursor.close().catch(() => {});
873
493
  });
874
494
 
875
- // Yield once on start, regardless of whether there are documents in the cursor.
876
- // This is to ensure that the first iteration of the generator yields immediately.
877
495
  yield;
878
496
 
879
497
  try {
880
498
  while (!signal.aborted) {
881
499
  const doc = await cursor.tryNext().catch((e) => {
882
500
  if (lib_mongo.isMongoServerError(e) && e.codeName === 'CappedPositionLost') {
883
- // Cursor position lost, potentially due to a high rate of notifications
884
501
  cursor = query();
885
- // Treat as an event found, before querying the new cursor again
886
502
  return {};
887
503
  } else {
888
504
  return Promise.reject(e);
@@ -891,8 +507,6 @@ export class MongoSyncBucketStorage
891
507
  if (cursor.closed) {
892
508
  return;
893
509
  }
894
- // Skip buffered documents, if any. We don't care about the contents,
895
- // we only want to know when new documents are inserted.
896
510
  cursor.readBufferedDocuments();
897
511
  if (doc != null) {
898
512
  yield;
@@ -914,7 +528,6 @@ export class MongoSyncBucketStorage
914
528
  await timers.setTimeout(CHECKPOINT_TIMEOUT_MS, undefined, { signal });
915
529
  } catch (e) {
916
530
  if (e.name == 'AbortError') {
917
- // This is how we typically abort this stream, when all listeners are done
918
531
  return;
919
532
  }
920
533
  throw e;
@@ -926,94 +539,37 @@ export class MongoSyncBucketStorage
926
539
  }
927
540
  }
928
541
 
542
+ protected abstract getDataBucketChangesImpl(
543
+ options: GetCheckpointChangesOptions
544
+ ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>>;
545
+
929
546
  private async getDataBucketChanges(
930
547
  options: GetCheckpointChangesOptions
931
548
  ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>> {
932
- const limit = 1000;
933
- const bucketStateUpdates = await this.db.bucket_state
934
- .find(
935
- {
936
- // We have an index on (_id.g, last_op).
937
- '_id.g': this.group_id,
938
- last_op: { $gt: options.lastCheckpoint.checkpoint }
939
- },
940
- {
941
- projection: {
942
- '_id.b': 1
943
- },
944
- limit: limit + 1,
945
- // batchSize is 1 more than limit to auto-close the cursor.
946
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
947
- batchSize: limit + 2,
948
- singleBatch: true
949
- }
950
- )
951
- .toArray();
952
-
953
- const buckets = bucketStateUpdates.map((doc) => doc._id.b);
954
- const invalidateDataBuckets = buckets.length > limit;
955
-
956
- return {
957
- invalidateDataBuckets: invalidateDataBuckets,
958
- updatedDataBuckets: invalidateDataBuckets ? new Set<string>() : new Set(buckets)
959
- };
549
+ return this.getDataBucketChangesImpl(options);
960
550
  }
961
551
 
552
+ protected abstract getParameterBucketChangesImpl(
553
+ options: GetCheckpointChangesOptions
554
+ ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>>;
555
+
962
556
  private async getParameterBucketChanges(
963
557
  options: GetCheckpointChangesOptions
964
558
  ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>> {
965
- const limit = 1000;
966
- const parameterUpdates = await this.db.bucket_parameters
967
- .find(
968
- {
969
- _id: { $gt: options.lastCheckpoint.checkpoint, $lte: options.nextCheckpoint.checkpoint },
970
- 'key.g': this.group_id
971
- },
972
- {
973
- projection: {
974
- lookup: 1
975
- },
976
- limit: limit + 1,
977
- // batchSize is 1 more than limit to auto-close the cursor.
978
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
979
- batchSize: limit + 2,
980
- singleBatch: true
981
- }
982
- )
983
- .toArray();
984
- const invalidateParameterUpdates = parameterUpdates.length > limit;
985
-
986
- return {
987
- invalidateParameterBuckets: invalidateParameterUpdates,
988
- updatedParameterLookups: invalidateParameterUpdates
989
- ? new Set<string>()
990
- : new Set<string>(parameterUpdates.map((p) => JSONBig.stringify(deserializeParameterLookup(p.lookup))))
991
- };
559
+ return this.getParameterBucketChangesImpl(options);
992
560
  }
993
561
 
994
- // If we processed all connections together for each checkpoint, we could do a single lookup for all connections.
995
- // In practice, specific connections may fall behind. So instead, we just cache the results of each specific lookup.
996
- // TODO (later):
997
- // We can optimize this by implementing it like ChecksumCache: We can use partial cache results to do
998
- // more efficient lookups in some cases.
999
562
  private checkpointChangesCache = new LRUCache<
1000
563
  string,
1001
564
  InternalCheckpointChanges,
1002
565
  { options: GetCheckpointChangesOptions }
1003
566
  >({
1004
- // Limit to 50 cache entries, or 10MB, whichever comes first.
1005
- // Some rough calculations:
1006
- // If we process 10 checkpoints per second, and a connection may be 2 seconds behind, we could have
1007
- // up to 20 relevant checkpoints. That gives us 20*20 = 400 potentially-relevant cache entries.
1008
- // That is a worst-case scenario, so we don't actually store that many. In real life, the cache keys
1009
- // would likely be clustered around a few values, rather than spread over all 400 potential values.
1010
567
  max: 50,
1011
568
  maxSize: 12 * 1024 * 1024,
1012
569
  sizeCalculation: (value: InternalCheckpointChanges) => {
1013
- // Estimate of memory usage
1014
570
  const paramSize = [...value.updatedParameterLookups].reduce<number>((a, b) => a + b.length, 0);
1015
571
  const bucketSize = [...value.updatedDataBuckets].reduce<number>((a, b) => a + b.length, 0);
1016
- const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30; // estiamte for user_id + bigint
572
+ const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30;
1017
573
  return 100 + paramSize + bucketSize + writeCheckpointSize;
1018
574
  },
1019
575
  fetchMethod: async (_key, _staleValue, options) => {
@@ -1040,11 +596,6 @@ export class MongoSyncBucketStorage
1040
596
  }
1041
597
  }
1042
598
 
1043
- interface InternalCheckpointChanges extends CheckpointChanges {
1044
- updatedWriteCheckpoints: Map<string, bigint>;
1045
- invalidateWriteCheckpoints: boolean;
1046
- }
1047
-
1048
599
  class MongoReplicationCheckpoint implements ReplicationCheckpoint {
1049
600
  #storage: MongoSyncBucketStorage;
1050
601
 
@@ -1057,8 +608,8 @@ class MongoReplicationCheckpoint implements ReplicationCheckpoint {
1057
608
  this.#storage = storage;
1058
609
  }
1059
610
 
1060
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
1061
- return this.#storage.getParameterSets(this, lookups);
611
+ async getParameterSets(lookups: ScopedParameterLookup[], limit: number): Promise<ParameterLookupRows[]> {
612
+ return this.#storage.getParameterSets(this, lookups, limit);
1062
613
  }
1063
614
  }
1064
615
 
@@ -1066,7 +617,7 @@ class EmptyReplicationCheckpoint implements ReplicationCheckpoint {
1066
617
  readonly checkpoint: InternalOpId = 0n;
1067
618
  readonly lsn: string | null = null;
1068
619
 
1069
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
620
+ async getParameterSets(_lookups: ScopedParameterLookup[]): Promise<ParameterLookupRows[]> {
1070
621
  return [];
1071
622
  }
1072
623
  }