@powersync/service-module-mongodb-storage 0.15.3 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +1 -1
  3. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -1
  4. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +3 -3
  5. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -1
  6. package/dist/migrations/db/migrations/1770213298299-storage-version.js.map +1 -1
  7. package/dist/storage/MongoBucketStorage.d.ts +5 -3
  8. package/dist/storage/MongoBucketStorage.js +50 -36
  9. package/dist/storage/MongoBucketStorage.js.map +1 -1
  10. package/dist/storage/MongoReportStorage.js.map +1 -1
  11. package/dist/storage/implementation/BucketDefinitionMapping.d.ts +17 -0
  12. package/dist/storage/implementation/BucketDefinitionMapping.js +58 -0
  13. package/dist/storage/implementation/BucketDefinitionMapping.js.map +1 -0
  14. package/dist/storage/implementation/MongoBucketBatch.d.ts +16 -14
  15. package/dist/storage/implementation/MongoBucketBatch.js +80 -115
  16. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  17. package/dist/storage/implementation/MongoBucketBatchShared.d.ts +5 -0
  18. package/dist/storage/implementation/MongoBucketBatchShared.js +8 -0
  19. package/dist/storage/implementation/MongoBucketBatchShared.js.map +1 -0
  20. package/dist/storage/implementation/MongoChecksums.d.ts +28 -17
  21. package/dist/storage/implementation/MongoChecksums.js +13 -72
  22. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  23. package/dist/storage/implementation/MongoCompactor.d.ts +98 -58
  24. package/dist/storage/implementation/MongoCompactor.js +229 -296
  25. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  26. package/dist/storage/implementation/MongoParameterCompactor.d.ts +11 -6
  27. package/dist/storage/implementation/MongoParameterCompactor.js +11 -8
  28. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
  29. package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +14 -0
  30. package/dist/storage/implementation/MongoPersistedSyncRules.js +64 -0
  31. package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
  32. package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +3 -0
  33. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +9 -0
  34. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
  35. package/dist/storage/implementation/MongoStorageProvider.js +1 -1
  36. package/dist/storage/implementation/MongoStorageProvider.js.map +1 -1
  37. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +49 -30
  38. package/dist/storage/implementation/MongoSyncBucketStorage.js +96 -388
  39. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  40. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +5 -3
  41. package/dist/storage/implementation/MongoSyncRulesLock.js +12 -10
  42. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
  43. package/dist/storage/implementation/MongoWriteCheckpointAPI.js +1 -1
  44. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  45. package/dist/storage/implementation/OperationBatch.js +1 -1
  46. package/dist/storage/implementation/common/BucketDataDoc.d.ts +35 -0
  47. package/dist/storage/implementation/common/BucketDataDoc.js +2 -0
  48. package/dist/storage/implementation/common/BucketDataDoc.js.map +1 -0
  49. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.d.ts +13 -0
  50. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js +2 -0
  51. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js.map +1 -0
  52. package/dist/storage/implementation/common/PersistedBatch.d.ts +108 -0
  53. package/dist/storage/implementation/common/PersistedBatch.js +237 -0
  54. package/dist/storage/implementation/common/PersistedBatch.js.map +1 -0
  55. package/dist/storage/implementation/common/SingleBucketStore.d.ts +54 -0
  56. package/dist/storage/implementation/common/SingleBucketStore.js +3 -0
  57. package/dist/storage/implementation/common/SingleBucketStore.js.map +1 -0
  58. package/dist/storage/implementation/common/SourceRecordStore.d.ts +36 -0
  59. package/dist/storage/implementation/common/SourceRecordStore.js +2 -0
  60. package/dist/storage/implementation/common/SourceRecordStore.js.map +1 -0
  61. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.d.ts +27 -0
  62. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js +57 -0
  63. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js.map +1 -0
  64. package/dist/storage/implementation/createMongoSyncBucketStorage.d.ts +7 -0
  65. package/dist/storage/implementation/createMongoSyncBucketStorage.js +9 -0
  66. package/dist/storage/implementation/createMongoSyncBucketStorage.js.map +1 -0
  67. package/dist/storage/implementation/db.d.ts +34 -34
  68. package/dist/storage/implementation/db.js +78 -98
  69. package/dist/storage/implementation/db.js.map +1 -1
  70. package/dist/storage/implementation/models.d.ts +63 -34
  71. package/dist/storage/implementation/models.js +21 -2
  72. package/dist/storage/implementation/models.js.map +1 -1
  73. package/dist/storage/implementation/v1/MongoBucketBatchV1.d.ts +13 -0
  74. package/dist/storage/implementation/v1/MongoBucketBatchV1.js +22 -0
  75. package/dist/storage/implementation/v1/MongoBucketBatchV1.js.map +1 -0
  76. package/dist/storage/implementation/v1/MongoChecksumsV1.d.ts +12 -0
  77. package/dist/storage/implementation/v1/MongoChecksumsV1.js +56 -0
  78. package/dist/storage/implementation/v1/MongoChecksumsV1.js.map +1 -0
  79. package/dist/storage/implementation/v1/MongoCompactorV1.d.ts +23 -0
  80. package/dist/storage/implementation/v1/MongoCompactorV1.js +52 -0
  81. package/dist/storage/implementation/v1/MongoCompactorV1.js.map +1 -0
  82. package/dist/storage/implementation/v1/MongoParameterCompactorV1.d.ts +9 -0
  83. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js +20 -0
  84. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js.map +1 -0
  85. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.d.ts +41 -0
  86. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js +283 -0
  87. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js.map +1 -0
  88. package/dist/storage/implementation/v1/PersistedBatchV1.d.ts +26 -0
  89. package/dist/storage/implementation/v1/PersistedBatchV1.js +183 -0
  90. package/dist/storage/implementation/v1/PersistedBatchV1.js.map +1 -0
  91. package/dist/storage/implementation/v1/SingleBucketStoreV1.d.ts +18 -0
  92. package/dist/storage/implementation/v1/SingleBucketStoreV1.js +57 -0
  93. package/dist/storage/implementation/v1/SingleBucketStoreV1.js.map +1 -0
  94. package/dist/storage/implementation/v1/SourceRecordStoreV1.d.ts +19 -0
  95. package/dist/storage/implementation/v1/SourceRecordStoreV1.js +105 -0
  96. package/dist/storage/implementation/v1/SourceRecordStoreV1.js.map +1 -0
  97. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.d.ts +12 -0
  98. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js +20 -0
  99. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js.map +1 -0
  100. package/dist/storage/implementation/v1/models.d.ts +34 -0
  101. package/dist/storage/implementation/v1/models.js +37 -0
  102. package/dist/storage/implementation/v1/models.js.map +1 -0
  103. package/dist/storage/implementation/v3/MongoBucketBatchV3.d.ts +13 -0
  104. package/dist/storage/implementation/v3/MongoBucketBatchV3.js +34 -0
  105. package/dist/storage/implementation/v3/MongoBucketBatchV3.js.map +1 -0
  106. package/dist/storage/implementation/v3/MongoChecksumsV3.d.ts +15 -0
  107. package/dist/storage/implementation/v3/MongoChecksumsV3.js +84 -0
  108. package/dist/storage/implementation/v3/MongoChecksumsV3.js.map +1 -0
  109. package/dist/storage/implementation/v3/MongoCompactorV3.d.ts +23 -0
  110. package/dist/storage/implementation/v3/MongoCompactorV3.js +68 -0
  111. package/dist/storage/implementation/v3/MongoCompactorV3.js.map +1 -0
  112. package/dist/storage/implementation/v3/MongoParameterCompactorV3.d.ts +9 -0
  113. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js +18 -0
  114. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js.map +1 -0
  115. package/dist/storage/implementation/v3/MongoParameterLookupV3.d.ts +5 -0
  116. package/dist/storage/implementation/v3/MongoParameterLookupV3.js +9 -0
  117. package/dist/storage/implementation/v3/MongoParameterLookupV3.js.map +1 -0
  118. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.d.ts +41 -0
  119. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js +407 -0
  120. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js.map +1 -0
  121. package/dist/storage/implementation/v3/PersistedBatchV3.d.ts +29 -0
  122. package/dist/storage/implementation/v3/PersistedBatchV3.js +259 -0
  123. package/dist/storage/implementation/v3/PersistedBatchV3.js.map +1 -0
  124. package/dist/storage/implementation/v3/SingleBucketStoreV3.d.ts +18 -0
  125. package/dist/storage/implementation/v3/SingleBucketStoreV3.js +48 -0
  126. package/dist/storage/implementation/v3/SingleBucketStoreV3.js.map +1 -0
  127. package/dist/storage/implementation/v3/SourceRecordStoreV3.d.ts +22 -0
  128. package/dist/storage/implementation/v3/SourceRecordStoreV3.js +164 -0
  129. package/dist/storage/implementation/v3/SourceRecordStoreV3.js.map +1 -0
  130. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.d.ts +21 -0
  131. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js +71 -0
  132. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js.map +1 -0
  133. package/dist/storage/implementation/v3/models.d.ts +43 -0
  134. package/dist/storage/implementation/v3/models.js +34 -0
  135. package/dist/storage/implementation/v3/models.js.map +1 -0
  136. package/dist/storage/storage-index.d.ts +8 -5
  137. package/dist/storage/storage-index.js +8 -5
  138. package/dist/storage/storage-index.js.map +1 -1
  139. package/dist/utils/util.d.ts +11 -4
  140. package/dist/utils/util.js +25 -4
  141. package/dist/utils/util.js.map +1 -1
  142. package/package.json +9 -9
  143. package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +1 -1
  144. package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +7 -7
  145. package/src/migrations/db/migrations/1770213298299-storage-version.ts +1 -1
  146. package/src/storage/MongoBucketStorage.ts +97 -62
  147. package/src/storage/MongoReportStorage.ts +2 -2
  148. package/src/storage/implementation/BucketDefinitionMapping.ts +72 -0
  149. package/src/storage/implementation/MongoBucketBatch.ts +110 -144
  150. package/src/storage/implementation/MongoBucketBatchShared.ts +11 -0
  151. package/src/storage/implementation/MongoChecksums.ts +53 -76
  152. package/src/storage/implementation/MongoCompactor.ts +374 -404
  153. package/src/storage/implementation/MongoParameterCompactor.ts +37 -24
  154. package/src/storage/implementation/MongoPersistedSyncRules.ts +76 -0
  155. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +18 -1
  156. package/src/storage/implementation/MongoStorageProvider.ts +1 -1
  157. package/src/storage/implementation/MongoSyncBucketStorage.ts +190 -457
  158. package/src/storage/implementation/MongoSyncRulesLock.ts +12 -14
  159. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +4 -2
  160. package/src/storage/implementation/OperationBatch.ts +1 -1
  161. package/src/storage/implementation/common/BucketDataDoc.ts +37 -0
  162. package/src/storage/implementation/common/MongoSyncBucketStorageContext.ts +15 -0
  163. package/src/storage/implementation/common/PersistedBatch.ts +364 -0
  164. package/src/storage/implementation/common/SingleBucketStore.ts +63 -0
  165. package/src/storage/implementation/common/SourceRecordStore.ts +49 -0
  166. package/src/storage/implementation/common/VersionedPowerSyncMongoBase.ts +80 -0
  167. package/src/storage/implementation/createMongoSyncBucketStorage.ts +25 -0
  168. package/src/storage/implementation/db.ts +107 -128
  169. package/src/storage/implementation/models.ts +84 -38
  170. package/src/storage/implementation/v1/MongoBucketBatchV1.ts +32 -0
  171. package/src/storage/implementation/v1/MongoChecksumsV1.ts +75 -0
  172. package/src/storage/implementation/v1/MongoCompactorV1.ts +93 -0
  173. package/src/storage/implementation/v1/MongoParameterCompactorV1.ts +26 -0
  174. package/src/storage/implementation/v1/MongoSyncBucketStorageV1.ts +448 -0
  175. package/src/storage/implementation/v1/PersistedBatchV1.ts +230 -0
  176. package/src/storage/implementation/v1/SingleBucketStoreV1.ts +74 -0
  177. package/src/storage/implementation/v1/SourceRecordStoreV1.ts +156 -0
  178. package/src/storage/implementation/v1/VersionedPowerSyncMongoV1.ts +28 -0
  179. package/src/storage/implementation/v1/models.ts +84 -0
  180. package/src/storage/implementation/v3/MongoBucketBatchV3.ts +44 -0
  181. package/src/storage/implementation/v3/MongoChecksumsV3.ts +120 -0
  182. package/src/storage/implementation/v3/MongoCompactorV3.ts +107 -0
  183. package/src/storage/implementation/v3/MongoParameterCompactorV3.ts +24 -0
  184. package/src/storage/implementation/v3/MongoParameterLookupV3.ts +12 -0
  185. package/src/storage/implementation/v3/MongoSyncBucketStorageV3.ts +550 -0
  186. package/src/storage/implementation/v3/PersistedBatchV3.ts +318 -0
  187. package/src/storage/implementation/v3/SingleBucketStoreV3.ts +68 -0
  188. package/src/storage/implementation/v3/SourceRecordStoreV3.ts +226 -0
  189. package/src/storage/implementation/v3/VersionedPowerSyncMongoV3.ts +112 -0
  190. package/src/storage/implementation/v3/models.ts +96 -0
  191. package/src/storage/storage-index.ts +8 -5
  192. package/src/utils/util.ts +36 -7
  193. package/test/src/__snapshots__/storage_sync.test.ts.snap +282 -0
  194. package/test/src/connection-report-storage.test.ts +3 -3
  195. package/test/src/setup.ts +1 -1
  196. package/test/src/storage.test.ts +2 -2
  197. package/test/src/storage_compacting.test.ts +57 -29
  198. package/test/src/storage_sync.test.ts +351 -5
  199. package/test/tsconfig.json +0 -1
  200. package/tsconfig.tsbuildinfo +1 -1
  201. package/dist/storage/implementation/PersistedBatch.d.ts +0 -71
  202. package/dist/storage/implementation/PersistedBatch.js +0 -354
  203. package/dist/storage/implementation/PersistedBatch.js.map +0 -1
  204. package/src/storage/implementation/PersistedBatch.ts +0 -432
@@ -2,7 +2,8 @@ import * as lib_mongo from '@powersync/lib-service-mongodb';
2
2
  import { mongo } from '@powersync/lib-service-mongodb';
3
3
  import {
4
4
  BaseObserver,
5
- logger,
5
+ DO_NOT_LOG,
6
+ Logger,
6
7
  ReplicationAbortedError,
7
8
  ServiceAssertionError
8
9
  } from '@powersync/lib-services-framework';
@@ -10,48 +11,43 @@ import {
10
11
  BroadcastIterable,
11
12
  CHECKPOINT_INVALIDATE_ALL,
12
13
  CheckpointChanges,
13
- deserializeParameterLookup,
14
14
  GetCheckpointChangesOptions,
15
15
  InternalOpId,
16
- internalToExternalOpId,
17
16
  maxLsn,
18
17
  mergeAsyncIterables,
19
18
  PopulateChecksumCacheOptions,
20
19
  PopulateChecksumCacheResults,
21
- ProtocolOpId,
22
20
  ReplicationCheckpoint,
23
21
  storage,
24
22
  utils,
25
23
  WatchWriteCheckpointOptions
26
24
  } from '@powersync/service-core';
27
- import { JSONBig } from '@powersync/service-jsonbig';
28
- import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules';
25
+ import { HydratedSyncRules, ParameterLookupRows, ScopedParameterLookup } from '@powersync/service-sync-rules';
29
26
  import * as bson from 'bson';
30
27
  import { LRUCache } from 'lru-cache';
31
28
  import * as timers from 'timers/promises';
32
- import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js';
29
+ import { retryOnMongoMaxTimeMSExpired } from '../../utils/util.js';
33
30
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
34
- import { VersionedPowerSyncMongo } from './db.js';
35
- import {
36
- BucketDataDocument,
37
- BucketDataKey,
38
- BucketStateDocument,
39
- SourceKey,
40
- SourceTableDocument,
41
- StorageConfig
42
- } from './models.js';
43
- import { MongoBucketBatch } from './MongoBucketBatch.js';
31
+ import { MongoSyncBucketStorageContext } from './common/MongoSyncBucketStorageContext.js';
32
+ import type { VersionedPowerSyncMongo } from './db.js';
33
+ import { CommonSourceTableDocument, StorageConfig } from './models.js';
34
+ import { MongoBucketBatchOptions } from './MongoBucketBatch.js';
44
35
  import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
45
- import { MongoCompactor } from './MongoCompactor.js';
36
+ import { MongoCompactOptions, MongoCompactor } from './MongoCompactor.js';
46
37
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
47
38
  import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js';
48
39
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
49
40
 
50
41
  export interface MongoSyncBucketStorageOptions {
51
- checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig'>;
42
+ checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig' | 'mapping'>;
52
43
  storageConfig: StorageConfig;
53
44
  }
54
45
 
46
+ interface InternalCheckpointChanges extends CheckpointChanges {
47
+ updatedWriteCheckpoints: Map<string, bigint>;
48
+ invalidateWriteCheckpoints: boolean;
49
+ }
50
+
55
51
  /**
56
52
  * Only keep checkpoints around for a minute, before fetching a fresh one.
57
53
  *
@@ -63,41 +59,68 @@ export interface MongoSyncBucketStorageOptions {
63
59
  */
64
60
  const CHECKPOINT_TIMEOUT_MS = 60_000;
65
61
 
66
- export class MongoSyncBucketStorage
62
+ export abstract class MongoSyncBucketStorage
67
63
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
68
64
  implements storage.SyncRulesBucketStorage
69
65
  {
70
- private readonly db: VersionedPowerSyncMongo;
66
+ readonly db: VersionedPowerSyncMongo;
67
+ [DO_NOT_LOG] = true;
68
+
71
69
  readonly checksums: MongoChecksums;
72
70
 
73
71
  private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
74
72
  private writeCheckpointAPI: MongoWriteCheckpointAPI;
73
+ public readonly logger: Logger;
74
+ #storageInitialized = false;
75
75
 
76
76
  constructor(
77
77
  public readonly factory: MongoBucketStorage,
78
78
  public readonly group_id: number,
79
- private readonly sync_rules: MongoPersistedSyncRulesContent,
79
+ protected readonly sync_rules: MongoPersistedSyncRulesContent,
80
80
  public readonly slot_name: string,
81
81
  writeCheckpointMode: storage.WriteCheckpointMode | undefined,
82
82
  options: MongoSyncBucketStorageOptions
83
83
  ) {
84
84
  super();
85
85
  this.db = factory.db.versioned(sync_rules.getStorageConfig());
86
- this.checksums = new MongoChecksums(this.db, this.group_id, {
87
- ...options.checksumOptions,
88
- storageConfig: options?.storageConfig
89
- });
86
+ this.checksums = this.createMongoChecksums(options);
90
87
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
91
88
  db: this.db,
92
89
  mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
93
90
  sync_rules_id: group_id
94
91
  });
92
+ this.logger = sync_rules.logger;
95
93
  }
96
94
 
95
+ /**
96
+ * Not for external use - public here for tests only.
97
+ *
98
+ * @internal
99
+ */
100
+ abstract createMongoCompactor(options: MongoCompactOptions): MongoCompactor;
101
+
102
+ protected abstract createMongoChecksums(options: MongoSyncBucketStorageOptions): MongoChecksums;
103
+ protected abstract createMongoParameterCompactor(
104
+ checkpoint: InternalOpId,
105
+ options: storage.CompactOptions
106
+ ): MongoParameterCompactor;
107
+
97
108
  get writeCheckpointMode() {
98
109
  return this.writeCheckpointAPI.writeCheckpointMode;
99
110
  }
100
111
 
112
+ get mapping() {
113
+ return this.sync_rules.mapping;
114
+ }
115
+
116
+ protected get versionContext(): MongoSyncBucketStorageContext {
117
+ return {
118
+ db: this.db,
119
+ group_id: this.group_id,
120
+ mapping: this.mapping
121
+ };
122
+ }
123
+
101
124
  setWriteCheckpointMode(mode: storage.WriteCheckpointMode): void {
102
125
  this.writeCheckpointAPI.setWriteCheckpointMode(mode);
103
126
  }
@@ -115,10 +138,6 @@ export class MongoSyncBucketStorage
115
138
 
116
139
  getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules {
117
140
  const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {};
118
- /**
119
- * Check if the cached sync rules, if present, had the same options.
120
- * Parse sync rules if the options are different or if there is no cached value.
121
- */
122
141
  if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) {
123
142
  this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options };
124
143
  }
@@ -140,26 +159,15 @@ export class MongoSyncBucketStorage
140
159
  }
141
160
  );
142
161
  if (!doc?.snapshot_done || !['ACTIVE', 'ERRORED'].includes(doc.state)) {
143
- // Sync rules not active - return null
144
162
  return null;
145
163
  }
146
164
 
147
- // Specifically using operationTime instead of clusterTime
148
- // There are 3 fields in the response:
149
- // 1. operationTime, not exposed for snapshot sessions (used for causal consistency)
150
- // 2. clusterTime (used for connection management)
151
- // 3. atClusterTime, which is session.snapshotTime
152
- // We use atClusterTime, to match the driver's internal snapshot handling.
153
- // There are cases where clusterTime > operationTime and atClusterTime,
154
- // which could cause snapshot queries using this as the snapshotTime to timeout.
155
- // This was specifically observed on MongoDB 6.0 and 7.0.
156
165
  const snapshotTime = (session as any).snapshotTime as bson.Timestamp | undefined;
157
166
  if (snapshotTime == null) {
158
167
  throw new ServiceAssertionError('Missing snapshotTime in getCheckpoint()');
159
168
  }
160
169
  return new MongoReplicationCheckpoint(
161
170
  this,
162
- // null/0n is a valid checkpoint in some cases, for example if the initial snapshot was empty
163
171
  doc.last_checkpoint ?? 0n,
164
172
  doc.last_checkpoint_lsn ?? null,
165
173
  snapshotTime
@@ -167,7 +175,23 @@ export class MongoSyncBucketStorage
167
175
  });
168
176
  }
169
177
 
178
+ protected abstract initializeVersionStorage(): Promise<void>;
179
+
180
+ private async initializeStorage() {
181
+ if (this.#storageInitialized) {
182
+ return;
183
+ }
184
+
185
+ await this.db.initializeStreamStorage(this.group_id);
186
+ await this.initializeVersionStorage();
187
+ this.#storageInitialized = true;
188
+ }
189
+
190
+ protected abstract createWriterImpl(batchOptions: MongoBucketBatchOptions): storage.BucketStorageBatch;
191
+
170
192
  async createWriter(options: storage.CreateWriterOptions): Promise<storage.BucketStorageBatch> {
193
+ await this.initializeStorage();
194
+
171
195
  const doc = await this.db.sync_rules.findOne(
172
196
  {
173
197
  _id: this.group_id
@@ -176,10 +200,11 @@ export class MongoSyncBucketStorage
176
200
  );
177
201
  const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null;
178
202
 
179
- const writer = new MongoBucketBatch({
180
- logger: options.logger,
203
+ const batchOptions: MongoBucketBatchOptions = {
204
+ logger: options.logger ?? this.logger,
181
205
  db: this.db,
182
206
  syncRules: this.sync_rules.parsed(options).hydratedSyncRules(),
207
+ mapping: this.sync_rules.mapping,
183
208
  groupId: this.group_id,
184
209
  slotName: this.slot_name,
185
210
  lastCheckpointLsn: checkpoint_lsn,
@@ -187,15 +212,14 @@ export class MongoSyncBucketStorage
187
212
  keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null,
188
213
  storeCurrentData: options.storeCurrentData,
189
214
  skipExistingRows: options.skipExistingRows ?? false,
190
- markRecordUnavailable: options.markRecordUnavailable
191
- });
215
+ markRecordUnavailable: options.markRecordUnavailable,
216
+ tracer: options.tracer
217
+ };
218
+ const writer = this.createWriterImpl(batchOptions);
192
219
  this.iterateListeners((cb) => cb.batchStarted?.(writer));
193
220
  return writer;
194
221
  }
195
222
 
196
- /**
197
- * @deprecated Use `createWriter()` with `await using` instead.
198
- */
199
223
  async startBatch(
200
224
  options: storage.CreateWriterOptions,
201
225
  callback: (batch: storage.BucketStorageBatch) => Promise<void>
@@ -206,6 +230,16 @@ export class MongoSyncBucketStorage
206
230
  return writer.last_flushed_op != null ? { flushed_op: writer.last_flushed_op } : null;
207
231
  }
208
232
 
233
+ protected abstract sourceTableBaseId(): Partial<CommonSourceTableDocument>;
234
+
235
+ protected abstract augmentCreatedSourceTableDocument(
236
+ createDoc: CommonSourceTableDocument,
237
+ options: storage.ResolveTableOptions,
238
+ candidateSourceTable: storage.SourceTable
239
+ ): void;
240
+
241
+ protected abstract initializeResolvedSourceRecords(sourceTableId: bson.ObjectId): Promise<void>;
242
+
209
243
  async resolveTable(options: storage.ResolveTableOptions): Promise<storage.ResolveTableResult> {
210
244
  const { group_id, connection_id, connection_tag, entity_descriptor } = options;
211
245
 
@@ -217,23 +251,36 @@ export class MongoSyncBucketStorage
217
251
  type_oid: column.typeId
218
252
  }));
219
253
  let result: storage.ResolveTableResult | null = null;
254
+ let initializeSourceRecordsFor: bson.ObjectId | null = null;
255
+
256
+ const baseId = this.sourceTableBaseId();
220
257
  await this.db.client.withSession(async (session) => {
221
- const col = this.db.source_tables;
222
- let filter: Partial<SourceTableDocument> = {
223
- group_id: group_id,
258
+ const col = this.db.commonSourceTables(group_id);
259
+ let filter: Partial<CommonSourceTableDocument> = {
260
+ ...baseId,
224
261
  connection_id: connection_id,
225
262
  schema_name: schema,
226
263
  table_name: name,
227
264
  replica_id_columns2: normalizedReplicaIdColumns
228
265
  };
266
+
229
267
  if (objectId != null) {
230
268
  filter.relation_id = objectId;
231
269
  }
232
270
  let doc = await col.findOne(filter, { session });
233
271
  if (doc == null) {
234
- doc = {
235
- _id: new bson.ObjectId(),
236
- group_id: group_id,
272
+ const candidateSourceTable = new storage.SourceTable({
273
+ id: new bson.ObjectId(),
274
+ connectionTag: connection_tag,
275
+ objectId: objectId,
276
+ schema: schema,
277
+ name: name,
278
+ replicaIdColumns: replicaIdColumns,
279
+ snapshotComplete: false
280
+ });
281
+ const createDoc: CommonSourceTableDocument = {
282
+ _id: candidateSourceTable.id as bson.ObjectId,
283
+ ...(baseId as any),
237
284
  connection_id: connection_id,
238
285
  relation_id: objectId,
239
286
  schema_name: schema,
@@ -243,8 +290,11 @@ export class MongoSyncBucketStorage
243
290
  snapshot_done: false,
244
291
  snapshot_status: undefined
245
292
  };
293
+ this.augmentCreatedSourceTableDocument(createDoc, options, candidateSourceTable);
294
+ doc = createDoc;
246
295
 
247
296
  await col.insertOne(doc, { session });
297
+ initializeSourceRecordsFor = doc._id;
248
298
  }
249
299
  const sourceTable = new storage.SourceTable({
250
300
  id: doc._id,
@@ -268,16 +318,14 @@ export class MongoSyncBucketStorage
268
318
  };
269
319
 
270
320
  let dropTables: storage.SourceTable[] = [];
271
- // Detect tables that are either renamed, or have different replica_id_columns
272
321
  let truncateFilter = [{ schema_name: schema, table_name: name }] as any[];
273
322
  if (objectId != null) {
274
- // Only detect renames if the source uses relation ids.
275
323
  truncateFilter.push({ relation_id: objectId });
276
324
  }
277
325
  const truncate = await col
278
326
  .find(
279
327
  {
280
- group_id: group_id,
328
+ ...baseId,
281
329
  connection_id: connection_id,
282
330
  _id: { $ne: doc._id },
283
331
  $or: truncateFilter
@@ -304,223 +352,38 @@ export class MongoSyncBucketStorage
304
352
  dropTables: dropTables
305
353
  };
306
354
  });
355
+ if (initializeSourceRecordsFor != null) {
356
+ await this.initializeResolvedSourceRecords(initializeSourceRecordsFor);
357
+ }
307
358
  return result!;
308
359
  }
309
360
 
361
+ protected abstract getParameterSetsImpl(
362
+ checkpoint: MongoReplicationCheckpoint,
363
+ lookups: ScopedParameterLookup[],
364
+ limit: number
365
+ ): Promise<ParameterLookupRows[]>;
366
+
310
367
  async getParameterSets(
311
368
  checkpoint: MongoReplicationCheckpoint,
312
- lookups: ScopedParameterLookup[]
313
- ): Promise<SqliteJsonRow[]> {
314
- return this.db.client.withSession({ snapshot: true }, async (session) => {
315
- // Set the session's snapshot time to the checkpoint's snapshot time.
316
- // An alternative would be to create the session when the checkpoint is created, but managing
317
- // the session lifetime would become more complex.
318
- // Starting and ending sessions are cheap (synchronous when no transactions are used),
319
- // so this should be fine.
320
- // This is a roundabout way of setting {readConcern: {atClusterTime: clusterTime}}, since
321
- // that is not exposed directly by the driver.
322
- // Future versions of the driver may change the snapshotTime behavior, so we need tests to
323
- // validate that this works as expected. We test this in the compacting tests.
324
- setSessionSnapshotTime(session, checkpoint.snapshotTime);
325
- const lookupFilter = lookups.map((lookup) => {
326
- return storage.serializeLookup(lookup);
327
- });
328
- // This query does not use indexes super efficiently, apart from the lookup filter.
329
- // From some experimentation I could do individual lookups more efficient using an index
330
- // on {'key.g': 1, lookup: 1, 'key.t': 1, 'key.k': 1, _id: -1},
331
- // but could not do the same using $group.
332
- // For now, just rely on compacting to remove extraneous data.
333
- // For a description of the data format, see the `/docs/parameters-lookups.md` file.
334
- const rows = await this.db.bucket_parameters
335
- .aggregate(
336
- [
337
- {
338
- $match: {
339
- 'key.g': this.group_id,
340
- lookup: { $in: lookupFilter },
341
- _id: { $lte: checkpoint.checkpoint }
342
- }
343
- },
344
- {
345
- $sort: {
346
- _id: -1
347
- }
348
- },
349
- {
350
- $group: {
351
- _id: { key: '$key', lookup: '$lookup' },
352
- bucket_parameters: {
353
- $first: '$bucket_parameters'
354
- }
355
- }
356
- }
357
- ],
358
- {
359
- session,
360
- readConcern: 'snapshot',
361
- // Limit the time for the operation to complete, to avoid getting connection timeouts
362
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
363
- }
364
- )
365
- .toArray()
366
- .catch((e) => {
367
- throw lib_mongo.mapQueryError(e, 'while evaluating parameter queries');
368
- });
369
- const groupedParameters = rows.map((row) => {
370
- return row.bucket_parameters;
371
- });
372
- return groupedParameters.flat();
373
- });
369
+ lookups: ScopedParameterLookup[],
370
+ limit: number
371
+ ): Promise<ParameterLookupRows[]> {
372
+ return this.getParameterSetsImpl(checkpoint, lookups, limit);
374
373
  }
375
374
 
375
+ protected abstract getBucketDataBatchImpl(
376
+ checkpoint: utils.InternalOpId,
377
+ dataBuckets: storage.BucketDataRequest[],
378
+ options?: storage.BucketDataBatchOptions
379
+ ): AsyncIterable<storage.SyncBucketDataChunk>;
380
+
376
381
  async *getBucketDataBatch(
377
382
  checkpoint: utils.InternalOpId,
378
383
  dataBuckets: storage.BucketDataRequest[],
379
384
  options?: storage.BucketDataBatchOptions
380
385
  ): AsyncIterable<storage.SyncBucketDataChunk> {
381
- if (dataBuckets.length == 0) {
382
- return;
383
- }
384
- let filters: mongo.Filter<BucketDataDocument>[] = [];
385
- const bucketMap = new Map(dataBuckets.map((request) => [request.bucket, request.start]));
386
-
387
- if (checkpoint == null) {
388
- throw new ServiceAssertionError('checkpoint is null');
389
- }
390
- const end = checkpoint;
391
- for (let { bucket: name, start } of dataBuckets) {
392
- filters.push({
393
- _id: {
394
- $gt: {
395
- g: this.group_id,
396
- b: name,
397
- o: start
398
- },
399
- $lte: {
400
- g: this.group_id,
401
- b: name,
402
- o: end as any
403
- }
404
- }
405
- });
406
- }
407
-
408
- // Internal naming:
409
- // We do a query for one "batch", which may consist of multiple "chunks".
410
- // Each chunk is limited to single bucket, and is limited in length and size.
411
- // There are also overall batch length and size limits.
412
-
413
- const batchLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT;
414
- const chunkSizeLimitBytes = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES;
415
-
416
- const cursor = this.db.bucket_data.find(
417
- {
418
- $or: filters
419
- },
420
- {
421
- session: undefined,
422
- sort: { _id: 1 },
423
- limit: batchLimit,
424
- // Increase batch size above the default 101, so that we can fill an entire batch in
425
- // one go.
426
- // batchSize is 1 more than limit to auto-close the cursor.
427
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
428
- batchSize: batchLimit + 1,
429
- // Raw mode is returns an array of Buffer instead of parsed documents.
430
- // We use it so that:
431
- // 1. We can calculate the document size accurately without serializing again.
432
- // 2. We can delay parsing the results until it's needed.
433
- // We manually use bson.deserialize below
434
- raw: true,
435
-
436
- // Limit the time for the operation to complete, to avoid getting connection timeouts
437
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
438
- }
439
- ) as unknown as mongo.FindCursor<Buffer>;
440
-
441
- // We want to limit results to a single batch to avoid high memory usage.
442
- // This approach uses MongoDB's batch limits to limit the data here, which limits
443
- // to the lower of the batch count and size limits.
444
- // This is similar to using `singleBatch: true` in the find options, but allows
445
- // detecting "hasMore".
446
- let { data, hasMore: batchHasMore } = await readSingleBatch(cursor).catch((e) => {
447
- throw lib_mongo.mapQueryError(e, 'while reading bucket data');
448
- });
449
- if (data.length == batchLimit) {
450
- // Limit reached - could have more data, despite the cursor being drained.
451
- batchHasMore = true;
452
- }
453
-
454
- let chunkSizeBytes = 0;
455
- let currentChunk: utils.SyncBucketData | null = null;
456
- let targetOp: InternalOpId | null = null;
457
-
458
- // Ordered by _id, meaning buckets are grouped together
459
- for (let rawData of data) {
460
- const row = bson.deserialize(rawData, storage.BSON_DESERIALIZE_INTERNAL_OPTIONS) as BucketDataDocument;
461
- const bucket = row._id.b;
462
-
463
- if (currentChunk == null || currentChunk.bucket != bucket || chunkSizeBytes >= chunkSizeLimitBytes) {
464
- // We need to start a new chunk
465
- let start: ProtocolOpId | undefined = undefined;
466
- if (currentChunk != null) {
467
- // There is an existing chunk we need to yield
468
- if (currentChunk.bucket == bucket) {
469
- // Current and new chunk have the same bucket, so need has_more on the current one.
470
- // If currentChunk.bucket != bucket, then we reached the end of the previous bucket,
471
- // and has_more = false in that case.
472
- currentChunk.has_more = true;
473
- start = currentChunk.next_after;
474
- }
475
-
476
- const yieldChunk = currentChunk;
477
- currentChunk = null;
478
- chunkSizeBytes = 0;
479
- yield { chunkData: yieldChunk, targetOp: targetOp };
480
- targetOp = null;
481
- }
482
-
483
- if (start == null) {
484
- const startOpId = bucketMap.get(bucket);
485
- if (startOpId == null) {
486
- throw new ServiceAssertionError(`data for unexpected bucket: ${bucket}`);
487
- }
488
- start = internalToExternalOpId(startOpId);
489
- }
490
- currentChunk = {
491
- bucket,
492
- after: start,
493
- has_more: false,
494
- data: [],
495
- next_after: start
496
- };
497
- targetOp = null;
498
- }
499
-
500
- const entry = mapOpEntry(row);
501
-
502
- if (row.target_op != null) {
503
- // MOVE, CLEAR
504
- if (targetOp == null || row.target_op > targetOp) {
505
- targetOp = row.target_op;
506
- }
507
- }
508
-
509
- currentChunk.data.push(entry);
510
- currentChunk.next_after = entry.op_id;
511
-
512
- chunkSizeBytes += rawData.byteLength;
513
- }
514
-
515
- if (currentChunk != null) {
516
- const yieldChunk = currentChunk;
517
- currentChunk = null;
518
- // This is the final chunk in the batch.
519
- // There may be more data if and only if the batch we retrieved isn't complete.
520
- yieldChunk.has_more = batchHasMore;
521
- yield { chunkData: yieldChunk, targetOp: targetOp };
522
- targetOp = null;
523
- }
386
+ yield* this.getBucketDataBatchImpl(checkpoint, dataBuckets, options);
524
387
  }
525
388
 
526
389
  async getChecksums(
@@ -535,7 +398,6 @@ export class MongoSyncBucketStorage
535
398
  }
536
399
 
537
400
  async terminate(options?: storage.TerminateOptions) {
538
- // Default is to clear the storage except when explicitly requested not to.
539
401
  if (!options || options?.clearStorage) {
540
402
  await this.clear(options);
541
403
  }
@@ -569,7 +431,7 @@ export class MongoSyncBucketStorage
569
431
  }
570
432
  );
571
433
  if (doc == null) {
572
- throw new ServiceAssertionError('Cannot find sync rules status');
434
+ throw new ServiceAssertionError('Cannot find replication stream status');
573
435
  }
574
436
 
575
437
  return {
@@ -580,32 +442,22 @@ export class MongoSyncBucketStorage
580
442
  };
581
443
  }
582
444
 
445
+ protected abstract clearBucketData(signal?: AbortSignal): Promise<void>;
446
+
447
+ protected abstract clearParameterIndexes(signal?: AbortSignal): Promise<void>;
448
+
449
+ protected abstract clearSourceRecords(signal?: AbortSignal): Promise<void>;
450
+
451
+ protected abstract clearBucketState(signal?: AbortSignal): Promise<void>;
452
+
453
+ protected abstract clearSourceTables(signal?: AbortSignal): Promise<void>;
454
+
583
455
  async clear(options?: storage.ClearStorageOptions): Promise<void> {
584
- while (true) {
585
- if (options?.signal?.aborted) {
586
- throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason);
587
- }
588
- try {
589
- await this.clearIteration();
456
+ const signal = options?.signal;
590
457
 
591
- logger.info(`${this.slot_name} Done clearing data`);
592
- return;
593
- } catch (e: unknown) {
594
- if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
595
- logger.info(
596
- `${this.slot_name} Cleared batch of data in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
597
- );
598
- await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5);
599
- } else {
600
- throw e;
601
- }
602
- }
458
+ if (signal?.aborted) {
459
+ throw new ReplicationAbortedError('Aborted clearing data', signal.reason);
603
460
  }
604
- }
605
-
606
- private async clearIteration(): Promise<void> {
607
- // Individual operations here may time out with the maxTimeMS option.
608
- // It is expected to still make progress, and continue on the next try.
609
461
 
610
462
  await this.db.sync_rules.updateOne(
611
463
  {
@@ -625,39 +477,31 @@ export class MongoSyncBucketStorage
625
477
  },
626
478
  { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
627
479
  );
628
- await this.db.bucket_data.deleteMany(
629
- {
630
- _id: idPrefixFilter<BucketDataKey>({ g: this.group_id }, ['b', 'o'])
631
- },
632
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
633
- );
634
- await this.db.bucket_parameters.deleteMany(
635
- {
636
- 'key.g': this.group_id
637
- },
638
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
639
- );
640
-
641
- await this.db.common_current_data.deleteMany(
642
- {
643
- _id: idPrefixFilter<SourceKey>({ g: this.group_id }, ['t', 'k'])
644
- },
645
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
646
- );
647
-
648
- await this.db.bucket_state.deleteMany(
649
- {
650
- _id: idPrefixFilter<BucketStateDocument['_id']>({ g: this.group_id }, ['b'])
651
- },
652
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
653
- );
654
480
 
655
- await this.db.source_tables.deleteMany(
656
- {
657
- group_id: this.group_id
658
- },
659
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
660
- );
481
+ await this.clearBucketData(signal);
482
+ await this.clearParameterIndexes(signal);
483
+ await this.clearSourceRecords(signal);
484
+ await this.clearBucketState(signal);
485
+ await this.clearSourceTables(signal);
486
+
487
+ this.#storageInitialized = false;
488
+ }
489
+
490
+ protected async clearDeleteMany(
491
+ label: string,
492
+ operation: () => Promise<mongo.DeleteResult>,
493
+ signal?: AbortSignal
494
+ ): Promise<void> {
495
+ await retryOnMongoMaxTimeMSExpired(operation, {
496
+ signal,
497
+ abortMessage: 'Aborted clearing data',
498
+ retryDelayMs: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5,
499
+ onRetry: () => {
500
+ this.logger.info(
501
+ `Cleared batch of ${label} in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
502
+ );
503
+ }
504
+ });
661
505
  }
662
506
 
663
507
  async reportError(e: any): Promise<void> {
@@ -681,100 +525,68 @@ export class MongoSyncBucketStorage
681
525
  const checkpoint = await this.getCheckpointInternal();
682
526
  maxOpId = checkpoint?.checkpoint ?? undefined;
683
527
  }
684
- await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
528
+ await this.createMongoCompactor({ ...options, maxOpId, logger: this.logger }).compact();
685
529
 
686
530
  if (maxOpId != null && options?.compactParameterData) {
687
- await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
531
+ await this.createMongoParameterCompactor(maxOpId, options).compact();
688
532
  }
689
533
  }
690
534
 
691
535
  async populatePersistentChecksumCache(options: PopulateChecksumCacheOptions): Promise<PopulateChecksumCacheResults> {
692
- logger.info(`Populating persistent checksum cache...`);
536
+ this.logger.info(`Populating persistent checksum cache...`);
693
537
  const start = Date.now();
694
- // We do a minimal compact here.
695
- // We can optimize this in the future.
696
- const compactor = new MongoCompactor(this, this.db, {
538
+ const compactor = this.createMongoCompactor({
697
539
  ...options,
698
- // Don't track updates for MOVE compacting
699
- memoryLimitMB: 0
540
+ memoryLimitMB: 0,
541
+ logger: this.logger
700
542
  });
701
543
 
702
544
  const result = await compactor.populateChecksums({
703
- // There are cases with millions of small buckets, in which case it can take very long to
704
- // populate the checksums, with minimal benefit. We skip the small buckets here.
705
545
  minBucketChanges: options.minBucketChanges ?? 10
706
546
  });
707
547
  const duration = Date.now() - start;
708
- logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
548
+ this.logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
709
549
  return result;
710
550
  }
711
551
 
712
- /**
713
- * Instance-wide watch on the latest available checkpoint (op_id + lsn).
714
- */
715
552
  private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable<ReplicationCheckpoint> {
716
553
  if (signal.aborted) {
717
554
  return;
718
555
  }
719
556
 
720
- // If the stream is idle, we wait a max of a minute (CHECKPOINT_TIMEOUT_MS) before we get another checkpoint,
721
- // to avoid stale checkpoint snapshots. This is what checkpointTimeoutStream() is for.
722
- // Essentially, even if there are no actual checkpoint changes, we want a new snapshotTime every minute or so,
723
- // to ensure that any new clients connecting will get a valid snapshotTime.
724
557
  const stream = mergeAsyncIterables(
725
558
  [this.checkpointChangesStream(signal), this.checkpointTimeoutStream(signal)],
726
559
  signal
727
560
  );
728
561
 
729
- // We only watch changes to the active sync rules.
730
- // If it changes to inactive, we abort and restart with the new sync rules.
731
562
  for await (const _ of stream) {
732
563
  if (signal.aborted) {
733
- // Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
734
564
  break;
735
565
  }
736
566
 
737
567
  const op = await this.getCheckpointInternal();
738
568
  if (op == null) {
739
- // Sync rules have changed - abort and restart.
740
- // We do a soft close of the stream here - no error
741
569
  break;
742
570
  }
743
571
 
744
- // Previously, we only yielded when the checkpoint or lsn changed.
745
- // However, we always want to use the latest snapshotTime, so we skip that filtering here.
746
- // That filtering could be added in the per-user streams if needed, but in general the capped collection
747
- // should already only contain useful changes in most cases.
748
572
  yield op;
749
573
  }
750
574
  }
751
575
 
752
- // Nothing is done here until a subscriber starts to iterate
753
576
  private readonly sharedIter = new BroadcastIterable((signal) => {
754
577
  return this.watchActiveCheckpoint(signal);
755
578
  });
756
579
 
757
- /**
758
- * User-specific watch on the latest checkpoint and/or write checkpoint.
759
- */
760
580
  async *watchCheckpointChanges(options: WatchWriteCheckpointOptions): AsyncIterable<storage.StorageCheckpointUpdate> {
761
581
  let lastCheckpoint: ReplicationCheckpoint | null = null;
762
582
 
763
583
  const iter = this.sharedIter[Symbol.asyncIterator](options.signal);
764
584
 
765
585
  let writeCheckpoint: bigint | null = null;
766
- // true if we queried the initial write checkpoint, even if it doesn't exist
767
586
  let queriedInitialWriteCheckpoint = false;
768
587
 
769
588
  for await (const nextCheckpoint of iter) {
770
- // lsn changes are not important by itself.
771
- // What is important is:
772
- // 1. checkpoint (op_id) changes.
773
- // 2. write checkpoint changes for the specific user
774
-
775
589
  if (nextCheckpoint.lsn != null && !queriedInitialWriteCheckpoint) {
776
- // Lookup the first write checkpoint for the user when we can.
777
- // There will not actually be one in all cases.
778
590
  writeCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
779
591
  sync_rules_id: this.group_id,
780
592
  user_id: options.user_id,
@@ -790,15 +602,11 @@ export class MongoSyncBucketStorage
790
602
  lastCheckpoint.checkpoint == nextCheckpoint.checkpoint &&
791
603
  lastCheckpoint.lsn == nextCheckpoint.lsn
792
604
  ) {
793
- // No change - wait for next one
794
- // In some cases, many LSNs may be produced in a short time.
795
- // Add a delay to throttle the loop a bit.
796
605
  await timers.setTimeout(20 + 10 * Math.random());
797
606
  continue;
798
607
  }
799
608
 
800
609
  if (lastCheckpoint == null) {
801
- // First message for this stream - "INVALIDATE_ALL" means it will lookup all data
802
610
  yield {
803
611
  base: nextCheckpoint,
804
612
  writeCheckpoint,
@@ -812,8 +620,6 @@ export class MongoSyncBucketStorage
812
620
 
813
621
  let updatedWriteCheckpoint = updates.updatedWriteCheckpoints.get(options.user_id) ?? null;
814
622
  if (updates.invalidateWriteCheckpoints) {
815
- // Invalidated means there were too many updates to track the individual ones,
816
- // so we switch to "polling" (querying directly in each stream).
817
623
  updatedWriteCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
818
624
  sync_rules_id: this.group_id,
819
625
  user_id: options.user_id,
@@ -824,8 +630,6 @@ export class MongoSyncBucketStorage
824
630
  }
825
631
  if (updatedWriteCheckpoint != null && (writeCheckpoint == null || updatedWriteCheckpoint > writeCheckpoint)) {
826
632
  writeCheckpoint = updatedWriteCheckpoint;
827
- // If it happened that we haven't queried a write checkpoint at this point,
828
- // then we don't need to anymore, since we got an updated one.
829
633
  queriedInitialWriteCheckpoint = true;
830
634
  }
831
635
 
@@ -845,12 +649,6 @@ export class MongoSyncBucketStorage
845
649
  }
846
650
  }
847
651
 
848
- /**
849
- * This watches the checkpoint_events capped collection for new documents inserted,
850
- * and yields whenever one or more documents are inserted.
851
- *
852
- * The actual checkpoint must be queried on the sync_rules collection after this.
853
- */
854
652
  private async *checkpointChangesStream(signal: AbortSignal): AsyncGenerator<void> {
855
653
  if (signal.aborted) {
856
654
  return;
@@ -869,17 +667,13 @@ export class MongoSyncBucketStorage
869
667
  cursor.close().catch(() => {});
870
668
  });
871
669
 
872
- // Yield once on start, regardless of whether there are documents in the cursor.
873
- // This is to ensure that the first iteration of the generator yields immediately.
874
670
  yield;
875
671
 
876
672
  try {
877
673
  while (!signal.aborted) {
878
674
  const doc = await cursor.tryNext().catch((e) => {
879
675
  if (lib_mongo.isMongoServerError(e) && e.codeName === 'CappedPositionLost') {
880
- // Cursor position lost, potentially due to a high rate of notifications
881
676
  cursor = query();
882
- // Treat as an event found, before querying the new cursor again
883
677
  return {};
884
678
  } else {
885
679
  return Promise.reject(e);
@@ -888,8 +682,6 @@ export class MongoSyncBucketStorage
888
682
  if (cursor.closed) {
889
683
  return;
890
684
  }
891
- // Skip buffered documents, if any. We don't care about the contents,
892
- // we only want to know when new documents are inserted.
893
685
  cursor.readBufferedDocuments();
894
686
  if (doc != null) {
895
687
  yield;
@@ -911,7 +703,6 @@ export class MongoSyncBucketStorage
911
703
  await timers.setTimeout(CHECKPOINT_TIMEOUT_MS, undefined, { signal });
912
704
  } catch (e) {
913
705
  if (e.name == 'AbortError') {
914
- // This is how we typically abort this stream, when all listeners are done
915
706
  return;
916
707
  }
917
708
  throw e;
@@ -923,94 +714,37 @@ export class MongoSyncBucketStorage
923
714
  }
924
715
  }
925
716
 
717
+ protected abstract getDataBucketChangesImpl(
718
+ options: GetCheckpointChangesOptions
719
+ ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>>;
720
+
926
721
  private async getDataBucketChanges(
927
722
  options: GetCheckpointChangesOptions
928
723
  ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>> {
929
- const limit = 1000;
930
- const bucketStateUpdates = await this.db.bucket_state
931
- .find(
932
- {
933
- // We have an index on (_id.g, last_op).
934
- '_id.g': this.group_id,
935
- last_op: { $gt: options.lastCheckpoint.checkpoint }
936
- },
937
- {
938
- projection: {
939
- '_id.b': 1
940
- },
941
- limit: limit + 1,
942
- // batchSize is 1 more than limit to auto-close the cursor.
943
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
944
- batchSize: limit + 2,
945
- singleBatch: true
946
- }
947
- )
948
- .toArray();
949
-
950
- const buckets = bucketStateUpdates.map((doc) => doc._id.b);
951
- const invalidateDataBuckets = buckets.length > limit;
952
-
953
- return {
954
- invalidateDataBuckets: invalidateDataBuckets,
955
- updatedDataBuckets: invalidateDataBuckets ? new Set<string>() : new Set(buckets)
956
- };
724
+ return this.getDataBucketChangesImpl(options);
957
725
  }
958
726
 
727
+ protected abstract getParameterBucketChangesImpl(
728
+ options: GetCheckpointChangesOptions
729
+ ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>>;
730
+
959
731
  private async getParameterBucketChanges(
960
732
  options: GetCheckpointChangesOptions
961
733
  ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>> {
962
- const limit = 1000;
963
- const parameterUpdates = await this.db.bucket_parameters
964
- .find(
965
- {
966
- _id: { $gt: options.lastCheckpoint.checkpoint, $lte: options.nextCheckpoint.checkpoint },
967
- 'key.g': this.group_id
968
- },
969
- {
970
- projection: {
971
- lookup: 1
972
- },
973
- limit: limit + 1,
974
- // batchSize is 1 more than limit to auto-close the cursor.
975
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
976
- batchSize: limit + 2,
977
- singleBatch: true
978
- }
979
- )
980
- .toArray();
981
- const invalidateParameterUpdates = parameterUpdates.length > limit;
982
-
983
- return {
984
- invalidateParameterBuckets: invalidateParameterUpdates,
985
- updatedParameterLookups: invalidateParameterUpdates
986
- ? new Set<string>()
987
- : new Set<string>(parameterUpdates.map((p) => JSONBig.stringify(deserializeParameterLookup(p.lookup))))
988
- };
734
+ return this.getParameterBucketChangesImpl(options);
989
735
  }
990
736
 
991
- // If we processed all connections together for each checkpoint, we could do a single lookup for all connections.
992
- // In practice, specific connections may fall behind. So instead, we just cache the results of each specific lookup.
993
- // TODO (later):
994
- // We can optimize this by implementing it like ChecksumCache: We can use partial cache results to do
995
- // more efficient lookups in some cases.
996
737
  private checkpointChangesCache = new LRUCache<
997
738
  string,
998
739
  InternalCheckpointChanges,
999
740
  { options: GetCheckpointChangesOptions }
1000
741
  >({
1001
- // Limit to 50 cache entries, or 10MB, whichever comes first.
1002
- // Some rough calculations:
1003
- // If we process 10 checkpoints per second, and a connection may be 2 seconds behind, we could have
1004
- // up to 20 relevant checkpoints. That gives us 20*20 = 400 potentially-relevant cache entries.
1005
- // That is a worst-case scenario, so we don't actually store that many. In real life, the cache keys
1006
- // would likely be clustered around a few values, rather than spread over all 400 potential values.
1007
742
  max: 50,
1008
743
  maxSize: 12 * 1024 * 1024,
1009
744
  sizeCalculation: (value: InternalCheckpointChanges) => {
1010
- // Estimate of memory usage
1011
745
  const paramSize = [...value.updatedParameterLookups].reduce<number>((a, b) => a + b.length, 0);
1012
746
  const bucketSize = [...value.updatedDataBuckets].reduce<number>((a, b) => a + b.length, 0);
1013
- const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30; // estiamte for user_id + bigint
747
+ const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30;
1014
748
  return 100 + paramSize + bucketSize + writeCheckpointSize;
1015
749
  },
1016
750
  fetchMethod: async (_key, _staleValue, options) => {
@@ -1037,21 +771,20 @@ export class MongoSyncBucketStorage
1037
771
  }
1038
772
  }
1039
773
 
1040
- interface InternalCheckpointChanges extends CheckpointChanges {
1041
- updatedWriteCheckpoints: Map<string, bigint>;
1042
- invalidateWriteCheckpoints: boolean;
1043
- }
1044
-
1045
774
  class MongoReplicationCheckpoint implements ReplicationCheckpoint {
775
+ #storage: MongoSyncBucketStorage;
776
+
1046
777
  constructor(
1047
- private storage: MongoSyncBucketStorage,
778
+ storage: MongoSyncBucketStorage,
1048
779
  public readonly checkpoint: InternalOpId,
1049
780
  public readonly lsn: string | null,
1050
781
  public snapshotTime: mongo.Timestamp
1051
- ) {}
782
+ ) {
783
+ this.#storage = storage;
784
+ }
1052
785
 
1053
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
1054
- return this.storage.getParameterSets(this, lookups);
786
+ async getParameterSets(lookups: ScopedParameterLookup[], limit: number): Promise<ParameterLookupRows[]> {
787
+ return this.#storage.getParameterSets(this, lookups, limit);
1055
788
  }
1056
789
  }
1057
790
 
@@ -1059,7 +792,7 @@ class EmptyReplicationCheckpoint implements ReplicationCheckpoint {
1059
792
  readonly checkpoint: InternalOpId = 0n;
1060
793
  readonly lsn: string | null = null;
1061
794
 
1062
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
795
+ async getParameterSets(_lookups: ScopedParameterLookup[]): Promise<ParameterLookupRows[]> {
1063
796
  return [];
1064
797
  }
1065
798
  }