@powersync/service-module-mongodb-storage 0.15.4 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +1 -1
  3. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -1
  4. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +2 -2
  5. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -1
  6. package/dist/storage/MongoBucketStorage.d.ts +2 -2
  7. package/dist/storage/MongoBucketStorage.js +47 -34
  8. package/dist/storage/MongoBucketStorage.js.map +1 -1
  9. package/dist/storage/implementation/BucketDefinitionMapping.d.ts +17 -0
  10. package/dist/storage/implementation/BucketDefinitionMapping.js +58 -0
  11. package/dist/storage/implementation/BucketDefinitionMapping.js.map +1 -0
  12. package/dist/storage/implementation/MongoBucketBatch.d.ts +16 -14
  13. package/dist/storage/implementation/MongoBucketBatch.js +80 -115
  14. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  15. package/dist/storage/implementation/MongoBucketBatchShared.d.ts +5 -0
  16. package/dist/storage/implementation/MongoBucketBatchShared.js +8 -0
  17. package/dist/storage/implementation/MongoBucketBatchShared.js.map +1 -0
  18. package/dist/storage/implementation/MongoChecksums.d.ts +28 -17
  19. package/dist/storage/implementation/MongoChecksums.js +13 -72
  20. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  21. package/dist/storage/implementation/MongoCompactor.d.ts +98 -58
  22. package/dist/storage/implementation/MongoCompactor.js +229 -296
  23. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  24. package/dist/storage/implementation/MongoParameterCompactor.d.ts +11 -6
  25. package/dist/storage/implementation/MongoParameterCompactor.js +11 -8
  26. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
  27. package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +14 -0
  28. package/dist/storage/implementation/MongoPersistedSyncRules.js +64 -0
  29. package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
  30. package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +3 -0
  31. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +9 -0
  32. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
  33. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +47 -29
  34. package/dist/storage/implementation/MongoSyncBucketStorage.js +94 -387
  35. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  36. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +5 -3
  37. package/dist/storage/implementation/MongoSyncRulesLock.js +12 -10
  38. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
  39. package/dist/storage/implementation/MongoWriteCheckpointAPI.js +1 -1
  40. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  41. package/dist/storage/implementation/OperationBatch.js +1 -1
  42. package/dist/storage/implementation/common/BucketDataDoc.d.ts +35 -0
  43. package/dist/storage/implementation/common/BucketDataDoc.js +2 -0
  44. package/dist/storage/implementation/common/BucketDataDoc.js.map +1 -0
  45. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.d.ts +13 -0
  46. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js +2 -0
  47. package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js.map +1 -0
  48. package/dist/storage/implementation/common/PersistedBatch.d.ts +108 -0
  49. package/dist/storage/implementation/common/PersistedBatch.js +237 -0
  50. package/dist/storage/implementation/common/PersistedBatch.js.map +1 -0
  51. package/dist/storage/implementation/common/SingleBucketStore.d.ts +54 -0
  52. package/dist/storage/implementation/common/SingleBucketStore.js +3 -0
  53. package/dist/storage/implementation/common/SingleBucketStore.js.map +1 -0
  54. package/dist/storage/implementation/common/SourceRecordStore.d.ts +36 -0
  55. package/dist/storage/implementation/common/SourceRecordStore.js +2 -0
  56. package/dist/storage/implementation/common/SourceRecordStore.js.map +1 -0
  57. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.d.ts +27 -0
  58. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js +57 -0
  59. package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js.map +1 -0
  60. package/dist/storage/implementation/createMongoSyncBucketStorage.d.ts +7 -0
  61. package/dist/storage/implementation/createMongoSyncBucketStorage.js +9 -0
  62. package/dist/storage/implementation/createMongoSyncBucketStorage.js.map +1 -0
  63. package/dist/storage/implementation/db.d.ts +32 -35
  64. package/dist/storage/implementation/db.js +77 -99
  65. package/dist/storage/implementation/db.js.map +1 -1
  66. package/dist/storage/implementation/models.d.ts +62 -33
  67. package/dist/storage/implementation/models.js +20 -1
  68. package/dist/storage/implementation/models.js.map +1 -1
  69. package/dist/storage/implementation/v1/MongoBucketBatchV1.d.ts +13 -0
  70. package/dist/storage/implementation/v1/MongoBucketBatchV1.js +22 -0
  71. package/dist/storage/implementation/v1/MongoBucketBatchV1.js.map +1 -0
  72. package/dist/storage/implementation/v1/MongoChecksumsV1.d.ts +12 -0
  73. package/dist/storage/implementation/v1/MongoChecksumsV1.js +56 -0
  74. package/dist/storage/implementation/v1/MongoChecksumsV1.js.map +1 -0
  75. package/dist/storage/implementation/v1/MongoCompactorV1.d.ts +23 -0
  76. package/dist/storage/implementation/v1/MongoCompactorV1.js +52 -0
  77. package/dist/storage/implementation/v1/MongoCompactorV1.js.map +1 -0
  78. package/dist/storage/implementation/v1/MongoParameterCompactorV1.d.ts +9 -0
  79. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js +20 -0
  80. package/dist/storage/implementation/v1/MongoParameterCompactorV1.js.map +1 -0
  81. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.d.ts +41 -0
  82. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js +283 -0
  83. package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js.map +1 -0
  84. package/dist/storage/implementation/v1/PersistedBatchV1.d.ts +26 -0
  85. package/dist/storage/implementation/v1/PersistedBatchV1.js +183 -0
  86. package/dist/storage/implementation/v1/PersistedBatchV1.js.map +1 -0
  87. package/dist/storage/implementation/v1/SingleBucketStoreV1.d.ts +18 -0
  88. package/dist/storage/implementation/v1/SingleBucketStoreV1.js +57 -0
  89. package/dist/storage/implementation/v1/SingleBucketStoreV1.js.map +1 -0
  90. package/dist/storage/implementation/v1/SourceRecordStoreV1.d.ts +19 -0
  91. package/dist/storage/implementation/v1/SourceRecordStoreV1.js +105 -0
  92. package/dist/storage/implementation/v1/SourceRecordStoreV1.js.map +1 -0
  93. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.d.ts +12 -0
  94. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js +20 -0
  95. package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js.map +1 -0
  96. package/dist/storage/implementation/v1/models.d.ts +34 -0
  97. package/dist/storage/implementation/v1/models.js +37 -0
  98. package/dist/storage/implementation/v1/models.js.map +1 -0
  99. package/dist/storage/implementation/v3/MongoBucketBatchV3.d.ts +13 -0
  100. package/dist/storage/implementation/v3/MongoBucketBatchV3.js +34 -0
  101. package/dist/storage/implementation/v3/MongoBucketBatchV3.js.map +1 -0
  102. package/dist/storage/implementation/v3/MongoChecksumsV3.d.ts +15 -0
  103. package/dist/storage/implementation/v3/MongoChecksumsV3.js +84 -0
  104. package/dist/storage/implementation/v3/MongoChecksumsV3.js.map +1 -0
  105. package/dist/storage/implementation/v3/MongoCompactorV3.d.ts +23 -0
  106. package/dist/storage/implementation/v3/MongoCompactorV3.js +68 -0
  107. package/dist/storage/implementation/v3/MongoCompactorV3.js.map +1 -0
  108. package/dist/storage/implementation/v3/MongoParameterCompactorV3.d.ts +9 -0
  109. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js +18 -0
  110. package/dist/storage/implementation/v3/MongoParameterCompactorV3.js.map +1 -0
  111. package/dist/storage/implementation/v3/MongoParameterLookupV3.d.ts +5 -0
  112. package/dist/storage/implementation/v3/MongoParameterLookupV3.js +9 -0
  113. package/dist/storage/implementation/v3/MongoParameterLookupV3.js.map +1 -0
  114. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.d.ts +41 -0
  115. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js +407 -0
  116. package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js.map +1 -0
  117. package/dist/storage/implementation/v3/PersistedBatchV3.d.ts +29 -0
  118. package/dist/storage/implementation/v3/PersistedBatchV3.js +259 -0
  119. package/dist/storage/implementation/v3/PersistedBatchV3.js.map +1 -0
  120. package/dist/storage/implementation/v3/SingleBucketStoreV3.d.ts +18 -0
  121. package/dist/storage/implementation/v3/SingleBucketStoreV3.js +48 -0
  122. package/dist/storage/implementation/v3/SingleBucketStoreV3.js.map +1 -0
  123. package/dist/storage/implementation/v3/SourceRecordStoreV3.d.ts +22 -0
  124. package/dist/storage/implementation/v3/SourceRecordStoreV3.js +164 -0
  125. package/dist/storage/implementation/v3/SourceRecordStoreV3.js.map +1 -0
  126. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.d.ts +21 -0
  127. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js +71 -0
  128. package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js.map +1 -0
  129. package/dist/storage/implementation/v3/models.d.ts +43 -0
  130. package/dist/storage/implementation/v3/models.js +34 -0
  131. package/dist/storage/implementation/v3/models.js.map +1 -0
  132. package/dist/storage/storage-index.d.ts +6 -3
  133. package/dist/storage/storage-index.js +6 -3
  134. package/dist/storage/storage-index.js.map +1 -1
  135. package/dist/utils/util.d.ts +10 -3
  136. package/dist/utils/util.js +24 -3
  137. package/dist/utils/util.js.map +1 -1
  138. package/package.json +9 -9
  139. package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +1 -1
  140. package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +6 -6
  141. package/src/storage/MongoBucketStorage.ts +92 -59
  142. package/src/storage/implementation/BucketDefinitionMapping.ts +72 -0
  143. package/src/storage/implementation/MongoBucketBatch.ts +110 -144
  144. package/src/storage/implementation/MongoBucketBatchShared.ts +11 -0
  145. package/src/storage/implementation/MongoChecksums.ts +52 -75
  146. package/src/storage/implementation/MongoCompactor.ts +374 -404
  147. package/src/storage/implementation/MongoParameterCompactor.ts +37 -24
  148. package/src/storage/implementation/MongoPersistedSyncRules.ts +76 -0
  149. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +17 -0
  150. package/src/storage/implementation/MongoSyncBucketStorage.ts +181 -455
  151. package/src/storage/implementation/MongoSyncRulesLock.ts +11 -13
  152. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +3 -1
  153. package/src/storage/implementation/OperationBatch.ts +1 -1
  154. package/src/storage/implementation/common/BucketDataDoc.ts +37 -0
  155. package/src/storage/implementation/common/MongoSyncBucketStorageContext.ts +15 -0
  156. package/src/storage/implementation/common/PersistedBatch.ts +364 -0
  157. package/src/storage/implementation/common/SingleBucketStore.ts +63 -0
  158. package/src/storage/implementation/common/SourceRecordStore.ts +49 -0
  159. package/src/storage/implementation/common/VersionedPowerSyncMongoBase.ts +80 -0
  160. package/src/storage/implementation/createMongoSyncBucketStorage.ts +25 -0
  161. package/src/storage/implementation/db.ts +105 -129
  162. package/src/storage/implementation/models.ts +82 -36
  163. package/src/storage/implementation/v1/MongoBucketBatchV1.ts +32 -0
  164. package/src/storage/implementation/v1/MongoChecksumsV1.ts +75 -0
  165. package/src/storage/implementation/v1/MongoCompactorV1.ts +93 -0
  166. package/src/storage/implementation/v1/MongoParameterCompactorV1.ts +26 -0
  167. package/src/storage/implementation/v1/MongoSyncBucketStorageV1.ts +448 -0
  168. package/src/storage/implementation/v1/PersistedBatchV1.ts +230 -0
  169. package/src/storage/implementation/v1/SingleBucketStoreV1.ts +74 -0
  170. package/src/storage/implementation/v1/SourceRecordStoreV1.ts +156 -0
  171. package/src/storage/implementation/v1/VersionedPowerSyncMongoV1.ts +28 -0
  172. package/src/storage/implementation/v1/models.ts +84 -0
  173. package/src/storage/implementation/v3/MongoBucketBatchV3.ts +44 -0
  174. package/src/storage/implementation/v3/MongoChecksumsV3.ts +120 -0
  175. package/src/storage/implementation/v3/MongoCompactorV3.ts +107 -0
  176. package/src/storage/implementation/v3/MongoParameterCompactorV3.ts +24 -0
  177. package/src/storage/implementation/v3/MongoParameterLookupV3.ts +12 -0
  178. package/src/storage/implementation/v3/MongoSyncBucketStorageV3.ts +550 -0
  179. package/src/storage/implementation/v3/PersistedBatchV3.ts +318 -0
  180. package/src/storage/implementation/v3/SingleBucketStoreV3.ts +68 -0
  181. package/src/storage/implementation/v3/SourceRecordStoreV3.ts +226 -0
  182. package/src/storage/implementation/v3/VersionedPowerSyncMongoV3.ts +112 -0
  183. package/src/storage/implementation/v3/models.ts +96 -0
  184. package/src/storage/storage-index.ts +6 -3
  185. package/src/utils/util.ts +34 -5
  186. package/test/src/storage_compacting.test.ts +57 -29
  187. package/test/src/storage_sync.test.ts +351 -5
  188. package/test/tsconfig.json +0 -1
  189. package/tsconfig.tsbuildinfo +1 -1
  190. package/dist/storage/implementation/PersistedBatch.d.ts +0 -71
  191. package/dist/storage/implementation/PersistedBatch.js +0 -354
  192. package/dist/storage/implementation/PersistedBatch.js.map +0 -1
  193. package/src/storage/implementation/PersistedBatch.ts +0 -432
@@ -3,7 +3,7 @@ import { mongo } from '@powersync/lib-service-mongodb';
3
3
  import {
4
4
  BaseObserver,
5
5
  DO_NOT_LOG,
6
- logger,
6
+ Logger,
7
7
  ReplicationAbortedError,
8
8
  ServiceAssertionError
9
9
  } from '@powersync/lib-services-framework';
@@ -11,48 +11,43 @@ import {
11
11
  BroadcastIterable,
12
12
  CHECKPOINT_INVALIDATE_ALL,
13
13
  CheckpointChanges,
14
- deserializeParameterLookup,
15
14
  GetCheckpointChangesOptions,
16
15
  InternalOpId,
17
- internalToExternalOpId,
18
16
  maxLsn,
19
17
  mergeAsyncIterables,
20
18
  PopulateChecksumCacheOptions,
21
19
  PopulateChecksumCacheResults,
22
- ProtocolOpId,
23
20
  ReplicationCheckpoint,
24
21
  storage,
25
22
  utils,
26
23
  WatchWriteCheckpointOptions
27
24
  } from '@powersync/service-core';
28
- import { JSONBig } from '@powersync/service-jsonbig';
29
- import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules';
25
+ import { HydratedSyncRules, ParameterLookupRows, ScopedParameterLookup } from '@powersync/service-sync-rules';
30
26
  import * as bson from 'bson';
31
27
  import { LRUCache } from 'lru-cache';
32
28
  import * as timers from 'timers/promises';
33
- import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js';
29
+ import { retryOnMongoMaxTimeMSExpired } from '../../utils/util.js';
34
30
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
35
- import { VersionedPowerSyncMongo } from './db.js';
36
- import {
37
- BucketDataDocument,
38
- BucketDataKey,
39
- BucketStateDocument,
40
- SourceKey,
41
- SourceTableDocument,
42
- StorageConfig
43
- } from './models.js';
44
- import { MongoBucketBatch } from './MongoBucketBatch.js';
31
+ import { MongoSyncBucketStorageContext } from './common/MongoSyncBucketStorageContext.js';
32
+ import type { VersionedPowerSyncMongo } from './db.js';
33
+ import { CommonSourceTableDocument, StorageConfig } from './models.js';
34
+ import { MongoBucketBatchOptions } from './MongoBucketBatch.js';
45
35
  import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
46
- import { MongoCompactor } from './MongoCompactor.js';
36
+ import { MongoCompactOptions, MongoCompactor } from './MongoCompactor.js';
47
37
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
48
38
  import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js';
49
39
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
50
40
 
51
41
  export interface MongoSyncBucketStorageOptions {
52
- checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig'>;
42
+ checksumOptions?: Omit<MongoChecksumOptions, 'storageConfig' | 'mapping'>;
53
43
  storageConfig: StorageConfig;
54
44
  }
55
45
 
46
+ interface InternalCheckpointChanges extends CheckpointChanges {
47
+ updatedWriteCheckpoints: Map<string, bigint>;
48
+ invalidateWriteCheckpoints: boolean;
49
+ }
50
+
56
51
  /**
57
52
  * Only keep checkpoints around for a minute, before fetching a fresh one.
58
53
  *
@@ -64,43 +59,68 @@ export interface MongoSyncBucketStorageOptions {
64
59
  */
65
60
  const CHECKPOINT_TIMEOUT_MS = 60_000;
66
61
 
67
- export class MongoSyncBucketStorage
62
+ export abstract class MongoSyncBucketStorage
68
63
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
69
64
  implements storage.SyncRulesBucketStorage
70
65
  {
66
+ readonly db: VersionedPowerSyncMongo;
71
67
  [DO_NOT_LOG] = true;
72
68
 
73
- private readonly db: VersionedPowerSyncMongo;
74
69
  readonly checksums: MongoChecksums;
75
70
 
76
71
  private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
77
72
  private writeCheckpointAPI: MongoWriteCheckpointAPI;
73
+ public readonly logger: Logger;
74
+ #storageInitialized = false;
78
75
 
79
76
  constructor(
80
77
  public readonly factory: MongoBucketStorage,
81
78
  public readonly group_id: number,
82
- private readonly sync_rules: MongoPersistedSyncRulesContent,
79
+ protected readonly sync_rules: MongoPersistedSyncRulesContent,
83
80
  public readonly slot_name: string,
84
81
  writeCheckpointMode: storage.WriteCheckpointMode | undefined,
85
82
  options: MongoSyncBucketStorageOptions
86
83
  ) {
87
84
  super();
88
85
  this.db = factory.db.versioned(sync_rules.getStorageConfig());
89
- this.checksums = new MongoChecksums(this.db, this.group_id, {
90
- ...options.checksumOptions,
91
- storageConfig: options?.storageConfig
92
- });
86
+ this.checksums = this.createMongoChecksums(options);
93
87
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
94
88
  db: this.db,
95
89
  mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
96
90
  sync_rules_id: group_id
97
91
  });
92
+ this.logger = sync_rules.logger;
98
93
  }
99
94
 
95
+ /**
96
+ * Not for external use - public here for tests only.
97
+ *
98
+ * @internal
99
+ */
100
+ abstract createMongoCompactor(options: MongoCompactOptions): MongoCompactor;
101
+
102
+ protected abstract createMongoChecksums(options: MongoSyncBucketStorageOptions): MongoChecksums;
103
+ protected abstract createMongoParameterCompactor(
104
+ checkpoint: InternalOpId,
105
+ options: storage.CompactOptions
106
+ ): MongoParameterCompactor;
107
+
100
108
  get writeCheckpointMode() {
101
109
  return this.writeCheckpointAPI.writeCheckpointMode;
102
110
  }
103
111
 
112
+ get mapping() {
113
+ return this.sync_rules.mapping;
114
+ }
115
+
116
+ protected get versionContext(): MongoSyncBucketStorageContext {
117
+ return {
118
+ db: this.db,
119
+ group_id: this.group_id,
120
+ mapping: this.mapping
121
+ };
122
+ }
123
+
104
124
  setWriteCheckpointMode(mode: storage.WriteCheckpointMode): void {
105
125
  this.writeCheckpointAPI.setWriteCheckpointMode(mode);
106
126
  }
@@ -118,10 +138,6 @@ export class MongoSyncBucketStorage
118
138
 
119
139
  getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules {
120
140
  const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {};
121
- /**
122
- * Check if the cached sync rules, if present, had the same options.
123
- * Parse sync rules if the options are different or if there is no cached value.
124
- */
125
141
  if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) {
126
142
  this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options };
127
143
  }
@@ -143,26 +159,15 @@ export class MongoSyncBucketStorage
143
159
  }
144
160
  );
145
161
  if (!doc?.snapshot_done || !['ACTIVE', 'ERRORED'].includes(doc.state)) {
146
- // Sync rules not active - return null
147
162
  return null;
148
163
  }
149
164
 
150
- // Specifically using operationTime instead of clusterTime
151
- // There are 3 fields in the response:
152
- // 1. operationTime, not exposed for snapshot sessions (used for causal consistency)
153
- // 2. clusterTime (used for connection management)
154
- // 3. atClusterTime, which is session.snapshotTime
155
- // We use atClusterTime, to match the driver's internal snapshot handling.
156
- // There are cases where clusterTime > operationTime and atClusterTime,
157
- // which could cause snapshot queries using this as the snapshotTime to timeout.
158
- // This was specifically observed on MongoDB 6.0 and 7.0.
159
165
  const snapshotTime = (session as any).snapshotTime as bson.Timestamp | undefined;
160
166
  if (snapshotTime == null) {
161
167
  throw new ServiceAssertionError('Missing snapshotTime in getCheckpoint()');
162
168
  }
163
169
  return new MongoReplicationCheckpoint(
164
170
  this,
165
- // null/0n is a valid checkpoint in some cases, for example if the initial snapshot was empty
166
171
  doc.last_checkpoint ?? 0n,
167
172
  doc.last_checkpoint_lsn ?? null,
168
173
  snapshotTime
@@ -170,7 +175,23 @@ export class MongoSyncBucketStorage
170
175
  });
171
176
  }
172
177
 
178
+ protected abstract initializeVersionStorage(): Promise<void>;
179
+
180
+ private async initializeStorage() {
181
+ if (this.#storageInitialized) {
182
+ return;
183
+ }
184
+
185
+ await this.db.initializeStreamStorage(this.group_id);
186
+ await this.initializeVersionStorage();
187
+ this.#storageInitialized = true;
188
+ }
189
+
190
+ protected abstract createWriterImpl(batchOptions: MongoBucketBatchOptions): storage.BucketStorageBatch;
191
+
173
192
  async createWriter(options: storage.CreateWriterOptions): Promise<storage.BucketStorageBatch> {
193
+ await this.initializeStorage();
194
+
174
195
  const doc = await this.db.sync_rules.findOne(
175
196
  {
176
197
  _id: this.group_id
@@ -179,10 +200,11 @@ export class MongoSyncBucketStorage
179
200
  );
180
201
  const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null;
181
202
 
182
- const writer = new MongoBucketBatch({
183
- logger: options.logger,
203
+ const batchOptions: MongoBucketBatchOptions = {
204
+ logger: options.logger ?? this.logger,
184
205
  db: this.db,
185
206
  syncRules: this.sync_rules.parsed(options).hydratedSyncRules(),
207
+ mapping: this.sync_rules.mapping,
186
208
  groupId: this.group_id,
187
209
  slotName: this.slot_name,
188
210
  lastCheckpointLsn: checkpoint_lsn,
@@ -190,15 +212,14 @@ export class MongoSyncBucketStorage
190
212
  keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null,
191
213
  storeCurrentData: options.storeCurrentData,
192
214
  skipExistingRows: options.skipExistingRows ?? false,
193
- markRecordUnavailable: options.markRecordUnavailable
194
- });
215
+ markRecordUnavailable: options.markRecordUnavailable,
216
+ tracer: options.tracer
217
+ };
218
+ const writer = this.createWriterImpl(batchOptions);
195
219
  this.iterateListeners((cb) => cb.batchStarted?.(writer));
196
220
  return writer;
197
221
  }
198
222
 
199
- /**
200
- * @deprecated Use `createWriter()` with `await using` instead.
201
- */
202
223
  async startBatch(
203
224
  options: storage.CreateWriterOptions,
204
225
  callback: (batch: storage.BucketStorageBatch) => Promise<void>
@@ -209,6 +230,16 @@ export class MongoSyncBucketStorage
209
230
  return writer.last_flushed_op != null ? { flushed_op: writer.last_flushed_op } : null;
210
231
  }
211
232
 
233
+ protected abstract sourceTableBaseId(): Partial<CommonSourceTableDocument>;
234
+
235
+ protected abstract augmentCreatedSourceTableDocument(
236
+ createDoc: CommonSourceTableDocument,
237
+ options: storage.ResolveTableOptions,
238
+ candidateSourceTable: storage.SourceTable
239
+ ): void;
240
+
241
+ protected abstract initializeResolvedSourceRecords(sourceTableId: bson.ObjectId): Promise<void>;
242
+
212
243
  async resolveTable(options: storage.ResolveTableOptions): Promise<storage.ResolveTableResult> {
213
244
  const { group_id, connection_id, connection_tag, entity_descriptor } = options;
214
245
 
@@ -220,23 +251,36 @@ export class MongoSyncBucketStorage
220
251
  type_oid: column.typeId
221
252
  }));
222
253
  let result: storage.ResolveTableResult | null = null;
254
+ let initializeSourceRecordsFor: bson.ObjectId | null = null;
255
+
256
+ const baseId = this.sourceTableBaseId();
223
257
  await this.db.client.withSession(async (session) => {
224
- const col = this.db.source_tables;
225
- let filter: Partial<SourceTableDocument> = {
226
- group_id: group_id,
258
+ const col = this.db.commonSourceTables(group_id);
259
+ let filter: Partial<CommonSourceTableDocument> = {
260
+ ...baseId,
227
261
  connection_id: connection_id,
228
262
  schema_name: schema,
229
263
  table_name: name,
230
264
  replica_id_columns2: normalizedReplicaIdColumns
231
265
  };
266
+
232
267
  if (objectId != null) {
233
268
  filter.relation_id = objectId;
234
269
  }
235
270
  let doc = await col.findOne(filter, { session });
236
271
  if (doc == null) {
237
- doc = {
238
- _id: new bson.ObjectId(),
239
- group_id: group_id,
272
+ const candidateSourceTable = new storage.SourceTable({
273
+ id: new bson.ObjectId(),
274
+ connectionTag: connection_tag,
275
+ objectId: objectId,
276
+ schema: schema,
277
+ name: name,
278
+ replicaIdColumns: replicaIdColumns,
279
+ snapshotComplete: false
280
+ });
281
+ const createDoc: CommonSourceTableDocument = {
282
+ _id: candidateSourceTable.id as bson.ObjectId,
283
+ ...(baseId as any),
240
284
  connection_id: connection_id,
241
285
  relation_id: objectId,
242
286
  schema_name: schema,
@@ -246,8 +290,11 @@ export class MongoSyncBucketStorage
246
290
  snapshot_done: false,
247
291
  snapshot_status: undefined
248
292
  };
293
+ this.augmentCreatedSourceTableDocument(createDoc, options, candidateSourceTable);
294
+ doc = createDoc;
249
295
 
250
296
  await col.insertOne(doc, { session });
297
+ initializeSourceRecordsFor = doc._id;
251
298
  }
252
299
  const sourceTable = new storage.SourceTable({
253
300
  id: doc._id,
@@ -271,16 +318,14 @@ export class MongoSyncBucketStorage
271
318
  };
272
319
 
273
320
  let dropTables: storage.SourceTable[] = [];
274
- // Detect tables that are either renamed, or have different replica_id_columns
275
321
  let truncateFilter = [{ schema_name: schema, table_name: name }] as any[];
276
322
  if (objectId != null) {
277
- // Only detect renames if the source uses relation ids.
278
323
  truncateFilter.push({ relation_id: objectId });
279
324
  }
280
325
  const truncate = await col
281
326
  .find(
282
327
  {
283
- group_id: group_id,
328
+ ...baseId,
284
329
  connection_id: connection_id,
285
330
  _id: { $ne: doc._id },
286
331
  $or: truncateFilter
@@ -307,223 +352,38 @@ export class MongoSyncBucketStorage
307
352
  dropTables: dropTables
308
353
  };
309
354
  });
355
+ if (initializeSourceRecordsFor != null) {
356
+ await this.initializeResolvedSourceRecords(initializeSourceRecordsFor);
357
+ }
310
358
  return result!;
311
359
  }
312
360
 
361
+ protected abstract getParameterSetsImpl(
362
+ checkpoint: MongoReplicationCheckpoint,
363
+ lookups: ScopedParameterLookup[],
364
+ limit: number
365
+ ): Promise<ParameterLookupRows[]>;
366
+
313
367
  async getParameterSets(
314
368
  checkpoint: MongoReplicationCheckpoint,
315
- lookups: ScopedParameterLookup[]
316
- ): Promise<SqliteJsonRow[]> {
317
- return this.db.client.withSession({ snapshot: true }, async (session) => {
318
- // Set the session's snapshot time to the checkpoint's snapshot time.
319
- // An alternative would be to create the session when the checkpoint is created, but managing
320
- // the session lifetime would become more complex.
321
- // Starting and ending sessions are cheap (synchronous when no transactions are used),
322
- // so this should be fine.
323
- // This is a roundabout way of setting {readConcern: {atClusterTime: clusterTime}}, since
324
- // that is not exposed directly by the driver.
325
- // Future versions of the driver may change the snapshotTime behavior, so we need tests to
326
- // validate that this works as expected. We test this in the compacting tests.
327
- setSessionSnapshotTime(session, checkpoint.snapshotTime);
328
- const lookupFilter = lookups.map((lookup) => {
329
- return storage.serializeLookup(lookup);
330
- });
331
- // This query does not use indexes super efficiently, apart from the lookup filter.
332
- // From some experimentation I could do individual lookups more efficient using an index
333
- // on {'key.g': 1, lookup: 1, 'key.t': 1, 'key.k': 1, _id: -1},
334
- // but could not do the same using $group.
335
- // For now, just rely on compacting to remove extraneous data.
336
- // For a description of the data format, see the `/docs/parameters-lookups.md` file.
337
- const rows = await this.db.bucket_parameters
338
- .aggregate(
339
- [
340
- {
341
- $match: {
342
- 'key.g': this.group_id,
343
- lookup: { $in: lookupFilter },
344
- _id: { $lte: checkpoint.checkpoint }
345
- }
346
- },
347
- {
348
- $sort: {
349
- _id: -1
350
- }
351
- },
352
- {
353
- $group: {
354
- _id: { key: '$key', lookup: '$lookup' },
355
- bucket_parameters: {
356
- $first: '$bucket_parameters'
357
- }
358
- }
359
- }
360
- ],
361
- {
362
- session,
363
- readConcern: 'snapshot',
364
- // Limit the time for the operation to complete, to avoid getting connection timeouts
365
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
366
- }
367
- )
368
- .toArray()
369
- .catch((e) => {
370
- throw lib_mongo.mapQueryError(e, 'while evaluating parameter queries');
371
- });
372
- const groupedParameters = rows.map((row) => {
373
- return row.bucket_parameters;
374
- });
375
- return groupedParameters.flat();
376
- });
369
+ lookups: ScopedParameterLookup[],
370
+ limit: number
371
+ ): Promise<ParameterLookupRows[]> {
372
+ return this.getParameterSetsImpl(checkpoint, lookups, limit);
377
373
  }
378
374
 
375
+ protected abstract getBucketDataBatchImpl(
376
+ checkpoint: utils.InternalOpId,
377
+ dataBuckets: storage.BucketDataRequest[],
378
+ options?: storage.BucketDataBatchOptions
379
+ ): AsyncIterable<storage.SyncBucketDataChunk>;
380
+
379
381
  async *getBucketDataBatch(
380
382
  checkpoint: utils.InternalOpId,
381
383
  dataBuckets: storage.BucketDataRequest[],
382
384
  options?: storage.BucketDataBatchOptions
383
385
  ): AsyncIterable<storage.SyncBucketDataChunk> {
384
- if (dataBuckets.length == 0) {
385
- return;
386
- }
387
- let filters: mongo.Filter<BucketDataDocument>[] = [];
388
- const bucketMap = new Map(dataBuckets.map((request) => [request.bucket, request.start]));
389
-
390
- if (checkpoint == null) {
391
- throw new ServiceAssertionError('checkpoint is null');
392
- }
393
- const end = checkpoint;
394
- for (let { bucket: name, start } of dataBuckets) {
395
- filters.push({
396
- _id: {
397
- $gt: {
398
- g: this.group_id,
399
- b: name,
400
- o: start
401
- },
402
- $lte: {
403
- g: this.group_id,
404
- b: name,
405
- o: end as any
406
- }
407
- }
408
- });
409
- }
410
-
411
- // Internal naming:
412
- // We do a query for one "batch", which may consist of multiple "chunks".
413
- // Each chunk is limited to single bucket, and is limited in length and size.
414
- // There are also overall batch length and size limits.
415
-
416
- const batchLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT;
417
- const chunkSizeLimitBytes = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES;
418
-
419
- const cursor = this.db.bucket_data.find(
420
- {
421
- $or: filters
422
- },
423
- {
424
- session: undefined,
425
- sort: { _id: 1 },
426
- limit: batchLimit,
427
- // Increase batch size above the default 101, so that we can fill an entire batch in
428
- // one go.
429
- // batchSize is 1 more than limit to auto-close the cursor.
430
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
431
- batchSize: batchLimit + 1,
432
- // Raw mode is returns an array of Buffer instead of parsed documents.
433
- // We use it so that:
434
- // 1. We can calculate the document size accurately without serializing again.
435
- // 2. We can delay parsing the results until it's needed.
436
- // We manually use bson.deserialize below
437
- raw: true,
438
-
439
- // Limit the time for the operation to complete, to avoid getting connection timeouts
440
- maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS
441
- }
442
- ) as unknown as mongo.FindCursor<Buffer>;
443
-
444
- // We want to limit results to a single batch to avoid high memory usage.
445
- // This approach uses MongoDB's batch limits to limit the data here, which limits
446
- // to the lower of the batch count and size limits.
447
- // This is similar to using `singleBatch: true` in the find options, but allows
448
- // detecting "hasMore".
449
- let { data, hasMore: batchHasMore } = await readSingleBatch(cursor).catch((e) => {
450
- throw lib_mongo.mapQueryError(e, 'while reading bucket data');
451
- });
452
- if (data.length == batchLimit) {
453
- // Limit reached - could have more data, despite the cursor being drained.
454
- batchHasMore = true;
455
- }
456
-
457
- let chunkSizeBytes = 0;
458
- let currentChunk: utils.SyncBucketData | null = null;
459
- let targetOp: InternalOpId | null = null;
460
-
461
- // Ordered by _id, meaning buckets are grouped together
462
- for (let rawData of data) {
463
- const row = bson.deserialize(rawData, storage.BSON_DESERIALIZE_INTERNAL_OPTIONS) as BucketDataDocument;
464
- const bucket = row._id.b;
465
-
466
- if (currentChunk == null || currentChunk.bucket != bucket || chunkSizeBytes >= chunkSizeLimitBytes) {
467
- // We need to start a new chunk
468
- let start: ProtocolOpId | undefined = undefined;
469
- if (currentChunk != null) {
470
- // There is an existing chunk we need to yield
471
- if (currentChunk.bucket == bucket) {
472
- // Current and new chunk have the same bucket, so need has_more on the current one.
473
- // If currentChunk.bucket != bucket, then we reached the end of the previous bucket,
474
- // and has_more = false in that case.
475
- currentChunk.has_more = true;
476
- start = currentChunk.next_after;
477
- }
478
-
479
- const yieldChunk = currentChunk;
480
- currentChunk = null;
481
- chunkSizeBytes = 0;
482
- yield { chunkData: yieldChunk, targetOp: targetOp };
483
- targetOp = null;
484
- }
485
-
486
- if (start == null) {
487
- const startOpId = bucketMap.get(bucket);
488
- if (startOpId == null) {
489
- throw new ServiceAssertionError(`data for unexpected bucket: ${bucket}`);
490
- }
491
- start = internalToExternalOpId(startOpId);
492
- }
493
- currentChunk = {
494
- bucket,
495
- after: start,
496
- has_more: false,
497
- data: [],
498
- next_after: start
499
- };
500
- targetOp = null;
501
- }
502
-
503
- const entry = mapOpEntry(row);
504
-
505
- if (row.target_op != null) {
506
- // MOVE, CLEAR
507
- if (targetOp == null || row.target_op > targetOp) {
508
- targetOp = row.target_op;
509
- }
510
- }
511
-
512
- currentChunk.data.push(entry);
513
- currentChunk.next_after = entry.op_id;
514
-
515
- chunkSizeBytes += rawData.byteLength;
516
- }
517
-
518
- if (currentChunk != null) {
519
- const yieldChunk = currentChunk;
520
- currentChunk = null;
521
- // This is the final chunk in the batch.
522
- // There may be more data if and only if the batch we retrieved isn't complete.
523
- yieldChunk.has_more = batchHasMore;
524
- yield { chunkData: yieldChunk, targetOp: targetOp };
525
- targetOp = null;
526
- }
386
+ yield* this.getBucketDataBatchImpl(checkpoint, dataBuckets, options);
527
387
  }
528
388
 
529
389
  async getChecksums(
@@ -538,7 +398,6 @@ export class MongoSyncBucketStorage
538
398
  }
539
399
 
540
400
  async terminate(options?: storage.TerminateOptions) {
541
- // Default is to clear the storage except when explicitly requested not to.
542
401
  if (!options || options?.clearStorage) {
543
402
  await this.clear(options);
544
403
  }
@@ -572,7 +431,7 @@ export class MongoSyncBucketStorage
572
431
  }
573
432
  );
574
433
  if (doc == null) {
575
- throw new ServiceAssertionError('Cannot find sync rules status');
434
+ throw new ServiceAssertionError('Cannot find replication stream status');
576
435
  }
577
436
 
578
437
  return {
@@ -583,32 +442,22 @@ export class MongoSyncBucketStorage
583
442
  };
584
443
  }
585
444
 
445
+ protected abstract clearBucketData(signal?: AbortSignal): Promise<void>;
446
+
447
+ protected abstract clearParameterIndexes(signal?: AbortSignal): Promise<void>;
448
+
449
+ protected abstract clearSourceRecords(signal?: AbortSignal): Promise<void>;
450
+
451
+ protected abstract clearBucketState(signal?: AbortSignal): Promise<void>;
452
+
453
+ protected abstract clearSourceTables(signal?: AbortSignal): Promise<void>;
454
+
586
455
  async clear(options?: storage.ClearStorageOptions): Promise<void> {
587
- while (true) {
588
- if (options?.signal?.aborted) {
589
- throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason);
590
- }
591
- try {
592
- await this.clearIteration();
456
+ const signal = options?.signal;
593
457
 
594
- logger.info(`${this.slot_name} Done clearing data`);
595
- return;
596
- } catch (e: unknown) {
597
- if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') {
598
- logger.info(
599
- `${this.slot_name} Cleared batch of data in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
600
- );
601
- await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5);
602
- } else {
603
- throw e;
604
- }
605
- }
458
+ if (signal?.aborted) {
459
+ throw new ReplicationAbortedError('Aborted clearing data', signal.reason);
606
460
  }
607
- }
608
-
609
- private async clearIteration(): Promise<void> {
610
- // Individual operations here may time out with the maxTimeMS option.
611
- // It is expected to still make progress, and continue on the next try.
612
461
 
613
462
  await this.db.sync_rules.updateOne(
614
463
  {
@@ -628,39 +477,31 @@ export class MongoSyncBucketStorage
628
477
  },
629
478
  { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
630
479
  );
631
- await this.db.bucket_data.deleteMany(
632
- {
633
- _id: idPrefixFilter<BucketDataKey>({ g: this.group_id }, ['b', 'o'])
634
- },
635
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
636
- );
637
- await this.db.bucket_parameters.deleteMany(
638
- {
639
- 'key.g': this.group_id
640
- },
641
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
642
- );
643
-
644
- await this.db.common_current_data.deleteMany(
645
- {
646
- _id: idPrefixFilter<SourceKey>({ g: this.group_id }, ['t', 'k'])
647
- },
648
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
649
- );
650
-
651
- await this.db.bucket_state.deleteMany(
652
- {
653
- _id: idPrefixFilter<BucketStateDocument['_id']>({ g: this.group_id }, ['b'])
654
- },
655
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
656
- );
657
480
 
658
- await this.db.source_tables.deleteMany(
659
- {
660
- group_id: this.group_id
661
- },
662
- { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS }
663
- );
481
+ await this.clearBucketData(signal);
482
+ await this.clearParameterIndexes(signal);
483
+ await this.clearSourceRecords(signal);
484
+ await this.clearBucketState(signal);
485
+ await this.clearSourceTables(signal);
486
+
487
+ this.#storageInitialized = false;
488
+ }
489
+
490
+ protected async clearDeleteMany(
491
+ label: string,
492
+ operation: () => Promise<mongo.DeleteResult>,
493
+ signal?: AbortSignal
494
+ ): Promise<void> {
495
+ await retryOnMongoMaxTimeMSExpired(operation, {
496
+ signal,
497
+ abortMessage: 'Aborted clearing data',
498
+ retryDelayMs: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5,
499
+ onRetry: () => {
500
+ this.logger.info(
501
+ `Cleared batch of ${label} in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...`
502
+ );
503
+ }
504
+ });
664
505
  }
665
506
 
666
507
  async reportError(e: any): Promise<void> {
@@ -684,100 +525,68 @@ export class MongoSyncBucketStorage
684
525
  const checkpoint = await this.getCheckpointInternal();
685
526
  maxOpId = checkpoint?.checkpoint ?? undefined;
686
527
  }
687
- await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
528
+ await this.createMongoCompactor({ ...options, maxOpId, logger: this.logger }).compact();
688
529
 
689
530
  if (maxOpId != null && options?.compactParameterData) {
690
- await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
531
+ await this.createMongoParameterCompactor(maxOpId, options).compact();
691
532
  }
692
533
  }
693
534
 
694
535
  async populatePersistentChecksumCache(options: PopulateChecksumCacheOptions): Promise<PopulateChecksumCacheResults> {
695
- logger.info(`Populating persistent checksum cache...`);
536
+ this.logger.info(`Populating persistent checksum cache...`);
696
537
  const start = Date.now();
697
- // We do a minimal compact here.
698
- // We can optimize this in the future.
699
- const compactor = new MongoCompactor(this, this.db, {
538
+ const compactor = this.createMongoCompactor({
700
539
  ...options,
701
- // Don't track updates for MOVE compacting
702
- memoryLimitMB: 0
540
+ memoryLimitMB: 0,
541
+ logger: this.logger
703
542
  });
704
543
 
705
544
  const result = await compactor.populateChecksums({
706
- // There are cases with millions of small buckets, in which case it can take very long to
707
- // populate the checksums, with minimal benefit. We skip the small buckets here.
708
545
  minBucketChanges: options.minBucketChanges ?? 10
709
546
  });
710
547
  const duration = Date.now() - start;
711
- logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
548
+ this.logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
712
549
  return result;
713
550
  }
714
551
 
715
- /**
716
- * Instance-wide watch on the latest available checkpoint (op_id + lsn).
717
- */
718
552
  private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable<ReplicationCheckpoint> {
719
553
  if (signal.aborted) {
720
554
  return;
721
555
  }
722
556
 
723
- // If the stream is idle, we wait a max of a minute (CHECKPOINT_TIMEOUT_MS) before we get another checkpoint,
724
- // to avoid stale checkpoint snapshots. This is what checkpointTimeoutStream() is for.
725
- // Essentially, even if there are no actual checkpoint changes, we want a new snapshotTime every minute or so,
726
- // to ensure that any new clients connecting will get a valid snapshotTime.
727
557
  const stream = mergeAsyncIterables(
728
558
  [this.checkpointChangesStream(signal), this.checkpointTimeoutStream(signal)],
729
559
  signal
730
560
  );
731
561
 
732
- // We only watch changes to the active sync rules.
733
- // If it changes to inactive, we abort and restart with the new sync rules.
734
562
  for await (const _ of stream) {
735
563
  if (signal.aborted) {
736
- // Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
737
564
  break;
738
565
  }
739
566
 
740
567
  const op = await this.getCheckpointInternal();
741
568
  if (op == null) {
742
- // Sync rules have changed - abort and restart.
743
- // We do a soft close of the stream here - no error
744
569
  break;
745
570
  }
746
571
 
747
- // Previously, we only yielded when the checkpoint or lsn changed.
748
- // However, we always want to use the latest snapshotTime, so we skip that filtering here.
749
- // That filtering could be added in the per-user streams if needed, but in general the capped collection
750
- // should already only contain useful changes in most cases.
751
572
  yield op;
752
573
  }
753
574
  }
754
575
 
755
- // Nothing is done here until a subscriber starts to iterate
756
576
  private readonly sharedIter = new BroadcastIterable((signal) => {
757
577
  return this.watchActiveCheckpoint(signal);
758
578
  });
759
579
 
760
- /**
761
- * User-specific watch on the latest checkpoint and/or write checkpoint.
762
- */
763
580
  async *watchCheckpointChanges(options: WatchWriteCheckpointOptions): AsyncIterable<storage.StorageCheckpointUpdate> {
764
581
  let lastCheckpoint: ReplicationCheckpoint | null = null;
765
582
 
766
583
  const iter = this.sharedIter[Symbol.asyncIterator](options.signal);
767
584
 
768
585
  let writeCheckpoint: bigint | null = null;
769
- // true if we queried the initial write checkpoint, even if it doesn't exist
770
586
  let queriedInitialWriteCheckpoint = false;
771
587
 
772
588
  for await (const nextCheckpoint of iter) {
773
- // lsn changes are not important by itself.
774
- // What is important is:
775
- // 1. checkpoint (op_id) changes.
776
- // 2. write checkpoint changes for the specific user
777
-
778
589
  if (nextCheckpoint.lsn != null && !queriedInitialWriteCheckpoint) {
779
- // Lookup the first write checkpoint for the user when we can.
780
- // There will not actually be one in all cases.
781
590
  writeCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
782
591
  sync_rules_id: this.group_id,
783
592
  user_id: options.user_id,
@@ -793,15 +602,11 @@ export class MongoSyncBucketStorage
793
602
  lastCheckpoint.checkpoint == nextCheckpoint.checkpoint &&
794
603
  lastCheckpoint.lsn == nextCheckpoint.lsn
795
604
  ) {
796
- // No change - wait for next one
797
- // In some cases, many LSNs may be produced in a short time.
798
- // Add a delay to throttle the loop a bit.
799
605
  await timers.setTimeout(20 + 10 * Math.random());
800
606
  continue;
801
607
  }
802
608
 
803
609
  if (lastCheckpoint == null) {
804
- // First message for this stream - "INVALIDATE_ALL" means it will lookup all data
805
610
  yield {
806
611
  base: nextCheckpoint,
807
612
  writeCheckpoint,
@@ -815,8 +620,6 @@ export class MongoSyncBucketStorage
815
620
 
816
621
  let updatedWriteCheckpoint = updates.updatedWriteCheckpoints.get(options.user_id) ?? null;
817
622
  if (updates.invalidateWriteCheckpoints) {
818
- // Invalidated means there were too many updates to track the individual ones,
819
- // so we switch to "polling" (querying directly in each stream).
820
623
  updatedWriteCheckpoint = await this.writeCheckpointAPI.lastWriteCheckpoint({
821
624
  sync_rules_id: this.group_id,
822
625
  user_id: options.user_id,
@@ -827,8 +630,6 @@ export class MongoSyncBucketStorage
827
630
  }
828
631
  if (updatedWriteCheckpoint != null && (writeCheckpoint == null || updatedWriteCheckpoint > writeCheckpoint)) {
829
632
  writeCheckpoint = updatedWriteCheckpoint;
830
- // If it happened that we haven't queried a write checkpoint at this point,
831
- // then we don't need to anymore, since we got an updated one.
832
633
  queriedInitialWriteCheckpoint = true;
833
634
  }
834
635
 
@@ -848,12 +649,6 @@ export class MongoSyncBucketStorage
848
649
  }
849
650
  }
850
651
 
851
- /**
852
- * This watches the checkpoint_events capped collection for new documents inserted,
853
- * and yields whenever one or more documents are inserted.
854
- *
855
- * The actual checkpoint must be queried on the sync_rules collection after this.
856
- */
857
652
  private async *checkpointChangesStream(signal: AbortSignal): AsyncGenerator<void> {
858
653
  if (signal.aborted) {
859
654
  return;
@@ -872,17 +667,13 @@ export class MongoSyncBucketStorage
872
667
  cursor.close().catch(() => {});
873
668
  });
874
669
 
875
- // Yield once on start, regardless of whether there are documents in the cursor.
876
- // This is to ensure that the first iteration of the generator yields immediately.
877
670
  yield;
878
671
 
879
672
  try {
880
673
  while (!signal.aborted) {
881
674
  const doc = await cursor.tryNext().catch((e) => {
882
675
  if (lib_mongo.isMongoServerError(e) && e.codeName === 'CappedPositionLost') {
883
- // Cursor position lost, potentially due to a high rate of notifications
884
676
  cursor = query();
885
- // Treat as an event found, before querying the new cursor again
886
677
  return {};
887
678
  } else {
888
679
  return Promise.reject(e);
@@ -891,8 +682,6 @@ export class MongoSyncBucketStorage
891
682
  if (cursor.closed) {
892
683
  return;
893
684
  }
894
- // Skip buffered documents, if any. We don't care about the contents,
895
- // we only want to know when new documents are inserted.
896
685
  cursor.readBufferedDocuments();
897
686
  if (doc != null) {
898
687
  yield;
@@ -914,7 +703,6 @@ export class MongoSyncBucketStorage
914
703
  await timers.setTimeout(CHECKPOINT_TIMEOUT_MS, undefined, { signal });
915
704
  } catch (e) {
916
705
  if (e.name == 'AbortError') {
917
- // This is how we typically abort this stream, when all listeners are done
918
706
  return;
919
707
  }
920
708
  throw e;
@@ -926,94 +714,37 @@ export class MongoSyncBucketStorage
926
714
  }
927
715
  }
928
716
 
717
+ protected abstract getDataBucketChangesImpl(
718
+ options: GetCheckpointChangesOptions
719
+ ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>>;
720
+
929
721
  private async getDataBucketChanges(
930
722
  options: GetCheckpointChangesOptions
931
723
  ): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>> {
932
- const limit = 1000;
933
- const bucketStateUpdates = await this.db.bucket_state
934
- .find(
935
- {
936
- // We have an index on (_id.g, last_op).
937
- '_id.g': this.group_id,
938
- last_op: { $gt: options.lastCheckpoint.checkpoint }
939
- },
940
- {
941
- projection: {
942
- '_id.b': 1
943
- },
944
- limit: limit + 1,
945
- // batchSize is 1 more than limit to auto-close the cursor.
946
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
947
- batchSize: limit + 2,
948
- singleBatch: true
949
- }
950
- )
951
- .toArray();
952
-
953
- const buckets = bucketStateUpdates.map((doc) => doc._id.b);
954
- const invalidateDataBuckets = buckets.length > limit;
955
-
956
- return {
957
- invalidateDataBuckets: invalidateDataBuckets,
958
- updatedDataBuckets: invalidateDataBuckets ? new Set<string>() : new Set(buckets)
959
- };
724
+ return this.getDataBucketChangesImpl(options);
960
725
  }
961
726
 
727
+ protected abstract getParameterBucketChangesImpl(
728
+ options: GetCheckpointChangesOptions
729
+ ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>>;
730
+
962
731
  private async getParameterBucketChanges(
963
732
  options: GetCheckpointChangesOptions
964
733
  ): Promise<Pick<CheckpointChanges, 'updatedParameterLookups' | 'invalidateParameterBuckets'>> {
965
- const limit = 1000;
966
- const parameterUpdates = await this.db.bucket_parameters
967
- .find(
968
- {
969
- _id: { $gt: options.lastCheckpoint.checkpoint, $lte: options.nextCheckpoint.checkpoint },
970
- 'key.g': this.group_id
971
- },
972
- {
973
- projection: {
974
- lookup: 1
975
- },
976
- limit: limit + 1,
977
- // batchSize is 1 more than limit to auto-close the cursor.
978
- // See https://github.com/mongodb/node-mongodb-native/pull/4580
979
- batchSize: limit + 2,
980
- singleBatch: true
981
- }
982
- )
983
- .toArray();
984
- const invalidateParameterUpdates = parameterUpdates.length > limit;
985
-
986
- return {
987
- invalidateParameterBuckets: invalidateParameterUpdates,
988
- updatedParameterLookups: invalidateParameterUpdates
989
- ? new Set<string>()
990
- : new Set<string>(parameterUpdates.map((p) => JSONBig.stringify(deserializeParameterLookup(p.lookup))))
991
- };
734
+ return this.getParameterBucketChangesImpl(options);
992
735
  }
993
736
 
994
- // If we processed all connections together for each checkpoint, we could do a single lookup for all connections.
995
- // In practice, specific connections may fall behind. So instead, we just cache the results of each specific lookup.
996
- // TODO (later):
997
- // We can optimize this by implementing it like ChecksumCache: We can use partial cache results to do
998
- // more efficient lookups in some cases.
999
737
  private checkpointChangesCache = new LRUCache<
1000
738
  string,
1001
739
  InternalCheckpointChanges,
1002
740
  { options: GetCheckpointChangesOptions }
1003
741
  >({
1004
- // Limit to 50 cache entries, or 10MB, whichever comes first.
1005
- // Some rough calculations:
1006
- // If we process 10 checkpoints per second, and a connection may be 2 seconds behind, we could have
1007
- // up to 20 relevant checkpoints. That gives us 20*20 = 400 potentially-relevant cache entries.
1008
- // That is a worst-case scenario, so we don't actually store that many. In real life, the cache keys
1009
- // would likely be clustered around a few values, rather than spread over all 400 potential values.
1010
742
  max: 50,
1011
743
  maxSize: 12 * 1024 * 1024,
1012
744
  sizeCalculation: (value: InternalCheckpointChanges) => {
1013
- // Estimate of memory usage
1014
745
  const paramSize = [...value.updatedParameterLookups].reduce<number>((a, b) => a + b.length, 0);
1015
746
  const bucketSize = [...value.updatedDataBuckets].reduce<number>((a, b) => a + b.length, 0);
1016
- const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30; // estiamte for user_id + bigint
747
+ const writeCheckpointSize = value.updatedWriteCheckpoints.size * 30;
1017
748
  return 100 + paramSize + bucketSize + writeCheckpointSize;
1018
749
  },
1019
750
  fetchMethod: async (_key, _staleValue, options) => {
@@ -1040,11 +771,6 @@ export class MongoSyncBucketStorage
1040
771
  }
1041
772
  }
1042
773
 
1043
- interface InternalCheckpointChanges extends CheckpointChanges {
1044
- updatedWriteCheckpoints: Map<string, bigint>;
1045
- invalidateWriteCheckpoints: boolean;
1046
- }
1047
-
1048
774
  class MongoReplicationCheckpoint implements ReplicationCheckpoint {
1049
775
  #storage: MongoSyncBucketStorage;
1050
776
 
@@ -1057,8 +783,8 @@ class MongoReplicationCheckpoint implements ReplicationCheckpoint {
1057
783
  this.#storage = storage;
1058
784
  }
1059
785
 
1060
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
1061
- return this.#storage.getParameterSets(this, lookups);
786
+ async getParameterSets(lookups: ScopedParameterLookup[], limit: number): Promise<ParameterLookupRows[]> {
787
+ return this.#storage.getParameterSets(this, lookups, limit);
1062
788
  }
1063
789
  }
1064
790
 
@@ -1066,7 +792,7 @@ class EmptyReplicationCheckpoint implements ReplicationCheckpoint {
1066
792
  readonly checkpoint: InternalOpId = 0n;
1067
793
  readonly lsn: string | null = null;
1068
794
 
1069
- async getParameterSets(lookups: ScopedParameterLookup[]): Promise<SqliteJsonRow[]> {
795
+ async getParameterSets(_lookups: ScopedParameterLookup[]): Promise<ParameterLookupRows[]> {
1070
796
  return [];
1071
797
  }
1072
798
  }