@powersync/service-module-mongodb-storage 0.15.4 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +1 -1
- package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -1
- package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +2 -2
- package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -1
- package/dist/storage/MongoBucketStorage.d.ts +2 -2
- package/dist/storage/MongoBucketStorage.js +47 -34
- package/dist/storage/MongoBucketStorage.js.map +1 -1
- package/dist/storage/implementation/BucketDefinitionMapping.d.ts +17 -0
- package/dist/storage/implementation/BucketDefinitionMapping.js +58 -0
- package/dist/storage/implementation/BucketDefinitionMapping.js.map +1 -0
- package/dist/storage/implementation/MongoBucketBatch.d.ts +16 -14
- package/dist/storage/implementation/MongoBucketBatch.js +80 -115
- package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
- package/dist/storage/implementation/MongoBucketBatchShared.d.ts +5 -0
- package/dist/storage/implementation/MongoBucketBatchShared.js +8 -0
- package/dist/storage/implementation/MongoBucketBatchShared.js.map +1 -0
- package/dist/storage/implementation/MongoChecksums.d.ts +28 -17
- package/dist/storage/implementation/MongoChecksums.js +13 -72
- package/dist/storage/implementation/MongoChecksums.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.d.ts +98 -58
- package/dist/storage/implementation/MongoCompactor.js +229 -296
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoParameterCompactor.d.ts +11 -6
- package/dist/storage/implementation/MongoParameterCompactor.js +11 -8
- package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +14 -0
- package/dist/storage/implementation/MongoPersistedSyncRules.js +64 -0
- package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +3 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +9 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +47 -29
- package/dist/storage/implementation/MongoSyncBucketStorage.js +94 -387
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/MongoSyncRulesLock.d.ts +5 -3
- package/dist/storage/implementation/MongoSyncRulesLock.js +12 -10
- package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
- package/dist/storage/implementation/MongoWriteCheckpointAPI.js +1 -1
- package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
- package/dist/storage/implementation/OperationBatch.js +1 -1
- package/dist/storage/implementation/common/BucketDataDoc.d.ts +35 -0
- package/dist/storage/implementation/common/BucketDataDoc.js +2 -0
- package/dist/storage/implementation/common/BucketDataDoc.js.map +1 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.d.ts +13 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js +2 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js.map +1 -0
- package/dist/storage/implementation/common/PersistedBatch.d.ts +108 -0
- package/dist/storage/implementation/common/PersistedBatch.js +237 -0
- package/dist/storage/implementation/common/PersistedBatch.js.map +1 -0
- package/dist/storage/implementation/common/SingleBucketStore.d.ts +54 -0
- package/dist/storage/implementation/common/SingleBucketStore.js +3 -0
- package/dist/storage/implementation/common/SingleBucketStore.js.map +1 -0
- package/dist/storage/implementation/common/SourceRecordStore.d.ts +36 -0
- package/dist/storage/implementation/common/SourceRecordStore.js +2 -0
- package/dist/storage/implementation/common/SourceRecordStore.js.map +1 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.d.ts +27 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js +57 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js.map +1 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.d.ts +7 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.js +9 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.js.map +1 -0
- package/dist/storage/implementation/db.d.ts +32 -35
- package/dist/storage/implementation/db.js +77 -99
- package/dist/storage/implementation/db.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +62 -33
- package/dist/storage/implementation/models.js +20 -1
- package/dist/storage/implementation/models.js.map +1 -1
- package/dist/storage/implementation/v1/MongoBucketBatchV1.d.ts +13 -0
- package/dist/storage/implementation/v1/MongoBucketBatchV1.js +22 -0
- package/dist/storage/implementation/v1/MongoBucketBatchV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.d.ts +12 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.js +56 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.d.ts +23 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.js +52 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.d.ts +9 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.js +20 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.d.ts +41 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js +283 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js.map +1 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.d.ts +26 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.js +183 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.js.map +1 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.d.ts +18 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.js +57 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.js.map +1 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.d.ts +19 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.js +105 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.js.map +1 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.d.ts +12 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js +20 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js.map +1 -0
- package/dist/storage/implementation/v1/models.d.ts +34 -0
- package/dist/storage/implementation/v1/models.js +37 -0
- package/dist/storage/implementation/v1/models.js.map +1 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.d.ts +13 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.js +34 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.d.ts +15 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.js +84 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.d.ts +23 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.js +68 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.d.ts +9 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.js +18 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.d.ts +5 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.js +9 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.d.ts +41 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js +407 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js.map +1 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.d.ts +29 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.js +259 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.js.map +1 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.d.ts +18 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.js +48 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.js.map +1 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.d.ts +22 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.js +164 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.js.map +1 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.d.ts +21 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js +71 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js.map +1 -0
- package/dist/storage/implementation/v3/models.d.ts +43 -0
- package/dist/storage/implementation/v3/models.js +34 -0
- package/dist/storage/implementation/v3/models.js.map +1 -0
- package/dist/storage/storage-index.d.ts +6 -3
- package/dist/storage/storage-index.js +6 -3
- package/dist/storage/storage-index.js.map +1 -1
- package/dist/utils/util.d.ts +10 -3
- package/dist/utils/util.js +24 -3
- package/dist/utils/util.js.map +1 -1
- package/package.json +9 -9
- package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +1 -1
- package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +6 -6
- package/src/storage/MongoBucketStorage.ts +92 -59
- package/src/storage/implementation/BucketDefinitionMapping.ts +72 -0
- package/src/storage/implementation/MongoBucketBatch.ts +110 -144
- package/src/storage/implementation/MongoBucketBatchShared.ts +11 -0
- package/src/storage/implementation/MongoChecksums.ts +52 -75
- package/src/storage/implementation/MongoCompactor.ts +374 -404
- package/src/storage/implementation/MongoParameterCompactor.ts +37 -24
- package/src/storage/implementation/MongoPersistedSyncRules.ts +76 -0
- package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +17 -0
- package/src/storage/implementation/MongoSyncBucketStorage.ts +181 -455
- package/src/storage/implementation/MongoSyncRulesLock.ts +11 -13
- package/src/storage/implementation/MongoWriteCheckpointAPI.ts +3 -1
- package/src/storage/implementation/OperationBatch.ts +1 -1
- package/src/storage/implementation/common/BucketDataDoc.ts +37 -0
- package/src/storage/implementation/common/MongoSyncBucketStorageContext.ts +15 -0
- package/src/storage/implementation/common/PersistedBatch.ts +364 -0
- package/src/storage/implementation/common/SingleBucketStore.ts +63 -0
- package/src/storage/implementation/common/SourceRecordStore.ts +49 -0
- package/src/storage/implementation/common/VersionedPowerSyncMongoBase.ts +80 -0
- package/src/storage/implementation/createMongoSyncBucketStorage.ts +25 -0
- package/src/storage/implementation/db.ts +105 -129
- package/src/storage/implementation/models.ts +82 -36
- package/src/storage/implementation/v1/MongoBucketBatchV1.ts +32 -0
- package/src/storage/implementation/v1/MongoChecksumsV1.ts +75 -0
- package/src/storage/implementation/v1/MongoCompactorV1.ts +93 -0
- package/src/storage/implementation/v1/MongoParameterCompactorV1.ts +26 -0
- package/src/storage/implementation/v1/MongoSyncBucketStorageV1.ts +448 -0
- package/src/storage/implementation/v1/PersistedBatchV1.ts +230 -0
- package/src/storage/implementation/v1/SingleBucketStoreV1.ts +74 -0
- package/src/storage/implementation/v1/SourceRecordStoreV1.ts +156 -0
- package/src/storage/implementation/v1/VersionedPowerSyncMongoV1.ts +28 -0
- package/src/storage/implementation/v1/models.ts +84 -0
- package/src/storage/implementation/v3/MongoBucketBatchV3.ts +44 -0
- package/src/storage/implementation/v3/MongoChecksumsV3.ts +120 -0
- package/src/storage/implementation/v3/MongoCompactorV3.ts +107 -0
- package/src/storage/implementation/v3/MongoParameterCompactorV3.ts +24 -0
- package/src/storage/implementation/v3/MongoParameterLookupV3.ts +12 -0
- package/src/storage/implementation/v3/MongoSyncBucketStorageV3.ts +550 -0
- package/src/storage/implementation/v3/PersistedBatchV3.ts +318 -0
- package/src/storage/implementation/v3/SingleBucketStoreV3.ts +68 -0
- package/src/storage/implementation/v3/SourceRecordStoreV3.ts +226 -0
- package/src/storage/implementation/v3/VersionedPowerSyncMongoV3.ts +112 -0
- package/src/storage/implementation/v3/models.ts +96 -0
- package/src/storage/storage-index.ts +6 -3
- package/src/utils/util.ts +34 -5
- package/test/src/storage_compacting.test.ts +57 -29
- package/test/src/storage_sync.test.ts +351 -5
- package/test/tsconfig.json +0 -1
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/storage/implementation/PersistedBatch.d.ts +0 -71
- package/dist/storage/implementation/PersistedBatch.js +0 -354
- package/dist/storage/implementation/PersistedBatch.js.map +0 -1
- package/src/storage/implementation/PersistedBatch.ts +0 -432
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import { isMongoServerError, mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
logger as defaultLogger,
|
|
4
|
+
Logger,
|
|
5
|
+
ReplicationAssertionError,
|
|
6
|
+
ServiceAssertionError
|
|
7
|
+
} from '@powersync/lib-services-framework';
|
|
3
8
|
import {
|
|
4
9
|
addChecksums,
|
|
5
10
|
InternalOpId,
|
|
@@ -9,15 +14,18 @@ import {
|
|
|
9
14
|
utils
|
|
10
15
|
} from '@powersync/service-core';
|
|
11
16
|
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
17
|
+
import { BucketDefinitionId } from './BucketDefinitionMapping.js';
|
|
18
|
+
import { BucketDataDoc, BucketKey } from './common/BucketDataDoc.js';
|
|
19
|
+
import { BucketDataDocumentGeneric, SingleBucketStore } from './common/SingleBucketStore.js';
|
|
20
|
+
import type { VersionedPowerSyncMongo } from './db.js';
|
|
21
|
+
import { BucketStateDocumentBase } from './models.js';
|
|
22
|
+
import type { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
|
|
15
23
|
import { cacheKey } from './OperationBatch.js';
|
|
16
24
|
|
|
17
25
|
interface CurrentBucketState {
|
|
18
26
|
/** Bucket name */
|
|
19
27
|
bucket: string;
|
|
20
|
-
|
|
28
|
+
definitionId: BucketDefinitionId;
|
|
21
29
|
/**
|
|
22
30
|
* Rows seen in the bucket, with the last op_id of each.
|
|
23
31
|
*/
|
|
@@ -26,36 +34,30 @@ interface CurrentBucketState {
|
|
|
26
34
|
* Estimated memory usage of the seen Map.
|
|
27
35
|
*/
|
|
28
36
|
trackingSize: number;
|
|
29
|
-
|
|
30
37
|
/**
|
|
31
38
|
* Last (lowest) seen op_id that is not a PUT.
|
|
32
39
|
*/
|
|
33
40
|
lastNotPut: InternalOpId | null;
|
|
34
|
-
|
|
35
41
|
/**
|
|
36
42
|
* Number of REMOVE/MOVE operations seen since lastNotPut.
|
|
37
43
|
*/
|
|
38
44
|
opsSincePut: number;
|
|
39
|
-
|
|
40
45
|
/**
|
|
41
|
-
* Incrementally-updated checksum, up to maxOpId
|
|
46
|
+
* Incrementally-updated checksum, up to maxOpId.
|
|
42
47
|
*/
|
|
43
48
|
checksum: number;
|
|
44
|
-
|
|
45
49
|
/**
|
|
46
|
-
*
|
|
50
|
+
* Op count for the checksum.
|
|
47
51
|
*/
|
|
48
52
|
opCount: number;
|
|
49
|
-
|
|
50
53
|
/**
|
|
51
54
|
* Byte size of ops covered by the checksum.
|
|
52
55
|
*/
|
|
53
56
|
opBytes: number;
|
|
54
57
|
}
|
|
55
58
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
*/
|
|
59
|
+
type CompactClearProperties = 'op' | 'checksum' | 'target_op';
|
|
60
|
+
|
|
59
61
|
export interface MongoCompactOptions extends storage.CompactOptions {}
|
|
60
62
|
|
|
61
63
|
const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
|
|
@@ -64,28 +66,36 @@ const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000;
|
|
|
64
66
|
const DEFAULT_MIN_BUCKET_CHANGES = 10;
|
|
65
67
|
const DEFAULT_MIN_CHANGE_RATIO = 0.1;
|
|
66
68
|
const DIRTY_BUCKET_SCAN_BATCH_SIZE = 2_000;
|
|
67
|
-
|
|
68
69
|
/** This default is primarily for tests. */
|
|
69
70
|
const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
70
71
|
|
|
71
|
-
export
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
72
|
+
export interface DirtyBucket {
|
|
73
|
+
bucket: string;
|
|
74
|
+
definitionId: BucketDefinitionId | null;
|
|
75
|
+
estimatedCount: number;
|
|
76
|
+
dirtyRatio?: number;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export abstract class MongoCompactor {
|
|
80
|
+
protected updates: mongo.AnyBulkWriteOperation<BucketDataDocumentGeneric>[] = [];
|
|
81
|
+
protected bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocumentBase>[] = [];
|
|
82
|
+
|
|
83
|
+
protected readonly idLimitBytes: number;
|
|
84
|
+
protected readonly moveBatchLimit: number;
|
|
85
|
+
protected readonly moveBatchQueryLimit: number;
|
|
86
|
+
protected readonly clearBatchLimit: number;
|
|
87
|
+
protected readonly minBucketChanges: number;
|
|
88
|
+
protected readonly minChangeRatio: number;
|
|
89
|
+
protected readonly maxOpId: bigint;
|
|
90
|
+
protected readonly buckets: string[] | undefined;
|
|
91
|
+
protected readonly signal?: AbortSignal;
|
|
92
|
+
protected readonly group_id: number;
|
|
93
|
+
|
|
94
|
+
protected readonly logger: Logger;
|
|
85
95
|
|
|
86
96
|
constructor(
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
protected readonly storage: MongoSyncBucketStorage,
|
|
98
|
+
protected readonly db: VersionedPowerSyncMongo,
|
|
89
99
|
options: MongoCompactOptions
|
|
90
100
|
) {
|
|
91
101
|
this.group_id = storage.group_id;
|
|
@@ -98,6 +108,7 @@ export class MongoCompactor {
|
|
|
98
108
|
this.maxOpId = options.maxOpId ?? 0n;
|
|
99
109
|
this.buckets = options.compactBuckets;
|
|
100
110
|
this.signal = options.signal;
|
|
111
|
+
this.logger = options.logger ?? defaultLogger;
|
|
101
112
|
}
|
|
102
113
|
|
|
103
114
|
/**
|
|
@@ -107,9 +118,8 @@ export class MongoCompactor {
|
|
|
107
118
|
*/
|
|
108
119
|
async compact() {
|
|
109
120
|
if (this.buckets) {
|
|
110
|
-
for (
|
|
111
|
-
// We can make this more efficient later on by iterating
|
|
112
|
-
// through the buckets in a single query.
|
|
121
|
+
for (const bucket of this.buckets) {
|
|
122
|
+
// We can make this more efficient later on by iterating through the buckets in a single query.
|
|
113
123
|
// That makes batching more tricky, so we leave for later.
|
|
114
124
|
await this.compactSingleBucketRetried(bucket);
|
|
115
125
|
}
|
|
@@ -118,8 +128,161 @@ export class MongoCompactor {
|
|
|
118
128
|
}
|
|
119
129
|
}
|
|
120
130
|
|
|
121
|
-
|
|
122
|
-
|
|
131
|
+
/**
|
|
132
|
+
* Subset of compact, only populating checksums where relevant.
|
|
133
|
+
*/
|
|
134
|
+
async populateChecksums(options: { minBucketChanges: number }): Promise<PopulateChecksumCacheResults> {
|
|
135
|
+
let count = 0;
|
|
136
|
+
while (true) {
|
|
137
|
+
this.signal?.throwIfAborted();
|
|
138
|
+
const buckets = await this.dirtyBucketBatchForChecksums(options);
|
|
139
|
+
if (buckets.length == 0) {
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
this.signal?.throwIfAborted();
|
|
143
|
+
|
|
144
|
+
const start = Date.now();
|
|
145
|
+
// Filter batch by estimated bucket size, to reduce possibility of timeouts.
|
|
146
|
+
const checkBuckets: typeof buckets = [];
|
|
147
|
+
let totalCountEstimate = 0;
|
|
148
|
+
for (const bucket of buckets) {
|
|
149
|
+
checkBuckets.push(bucket);
|
|
150
|
+
totalCountEstimate += bucket.estimatedCount;
|
|
151
|
+
if (totalCountEstimate > 50_000) {
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
this.logger.info(
|
|
156
|
+
`Calculating checksums for batch of ${buckets.length} buckets, estimated count of ${totalCountEstimate}`
|
|
157
|
+
);
|
|
158
|
+
await this.updateChecksumsBatch(checkBuckets);
|
|
159
|
+
this.logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`);
|
|
160
|
+
count += checkBuckets.length;
|
|
161
|
+
}
|
|
162
|
+
return { buckets: count };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
protected async *dirtyBucketBatchesForCollection<TCollectionBucketState extends BucketStateDocumentBase>(
|
|
166
|
+
collection: mongo.Collection<TCollectionBucketState>,
|
|
167
|
+
lastId: TCollectionBucketState['_id'],
|
|
168
|
+
maxId: TCollectionBucketState['_id'],
|
|
169
|
+
options: {
|
|
170
|
+
minBucketChanges: number;
|
|
171
|
+
minChangeRatio: number;
|
|
172
|
+
},
|
|
173
|
+
getDefinitionId: (state: TCollectionBucketState) => BucketDefinitionId | null
|
|
174
|
+
): AsyncGenerator<DirtyBucket[]> {
|
|
175
|
+
while (true) {
|
|
176
|
+
// To avoid timeouts from too many buckets not meeting the minBucketChanges criteria, use an aggregation pipeline
|
|
177
|
+
// to scan a fixed batch of buckets at a time, but only return buckets that meet the criteria.
|
|
178
|
+
const [result] = await collection
|
|
179
|
+
.aggregate<{
|
|
180
|
+
buckets: TCollectionBucketState[];
|
|
181
|
+
cursor: Pick<TCollectionBucketState, '_id'>[];
|
|
182
|
+
}>(
|
|
183
|
+
[
|
|
184
|
+
{
|
|
185
|
+
$match: {
|
|
186
|
+
_id: { $gt: lastId, $lt: maxId }
|
|
187
|
+
}
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
$sort: { _id: 1 }
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
// Scan a fixed number of docs each query so sparse matches don't block progress.
|
|
194
|
+
$limit: DIRTY_BUCKET_SCAN_BATCH_SIZE
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
$facet: {
|
|
198
|
+
buckets: [
|
|
199
|
+
{
|
|
200
|
+
$match: {
|
|
201
|
+
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
$project: {
|
|
206
|
+
_id: 1,
|
|
207
|
+
estimate_since_compact: 1,
|
|
208
|
+
compacted_state: 1
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
],
|
|
212
|
+
// This is used for the next query.
|
|
213
|
+
cursor: [{ $sort: { _id: -1 } }, { $limit: 1 }, { $project: { _id: 1 } }]
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
],
|
|
217
|
+
{ maxTimeMS: MONGO_OPERATION_TIMEOUT_MS }
|
|
218
|
+
)
|
|
219
|
+
.toArray();
|
|
220
|
+
|
|
221
|
+
const cursor = result?.cursor?.[0];
|
|
222
|
+
if (cursor == null) {
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
lastId = cursor._id;
|
|
226
|
+
|
|
227
|
+
const mapped = (result?.buckets ?? []).map((bucketState) => {
|
|
228
|
+
// The numbers, specifically the bytes, could be a bigint. Convert to Number to allow calculating ratios.
|
|
229
|
+
// BigInt precision is not needed here since this is only an estimate.
|
|
230
|
+
const updatedCount = bucketState.estimate_since_compact?.count ?? 0;
|
|
231
|
+
const totalCount = (bucketState.compacted_state?.count ?? 0) + updatedCount;
|
|
232
|
+
const updatedBytes = Number(bucketState.estimate_since_compact?.bytes ?? 0);
|
|
233
|
+
const totalBytes = Number(bucketState.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
234
|
+
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
235
|
+
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
236
|
+
return {
|
|
237
|
+
bucket: bucketState._id.b,
|
|
238
|
+
definitionId: getDefinitionId(bucketState),
|
|
239
|
+
estimatedCount: totalCount,
|
|
240
|
+
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
241
|
+
};
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
yield mapped.filter(
|
|
245
|
+
(bucket) => bucket.estimatedCount >= options.minBucketChanges && bucket.dirtyRatio >= options.minChangeRatio
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
protected async dirtyBucketBatchForChecksumsForCollection<TBucketState extends BucketStateDocumentBase>(
|
|
251
|
+
collection: mongo.Collection<TBucketState>,
|
|
252
|
+
filter: mongo.Filter<TBucketState>,
|
|
253
|
+
getDefinitionId: (state: mongo.WithId<TBucketState>) => BucketDefinitionId | null
|
|
254
|
+
): Promise<DirtyBucket[]> {
|
|
255
|
+
const dirtyBuckets = await collection
|
|
256
|
+
.find(filter, {
|
|
257
|
+
projection: {
|
|
258
|
+
_id: 1,
|
|
259
|
+
estimate_since_compact: 1,
|
|
260
|
+
compacted_state: 1
|
|
261
|
+
},
|
|
262
|
+
sort: {
|
|
263
|
+
'estimate_since_compact.count': -1
|
|
264
|
+
},
|
|
265
|
+
limit: 200,
|
|
266
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
267
|
+
})
|
|
268
|
+
.toArray();
|
|
269
|
+
|
|
270
|
+
return dirtyBuckets.map((bucket) => ({
|
|
271
|
+
bucket: bucket._id.b,
|
|
272
|
+
definitionId: getDefinitionId(bucket),
|
|
273
|
+
estimatedCount: Number(bucket.estimate_since_compact!.count) + Number(bucket.compacted_state?.count ?? 0)
|
|
274
|
+
}));
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
public abstract dirtyBucketBatches(options: {
|
|
278
|
+
minBucketChanges: number;
|
|
279
|
+
minChangeRatio: number;
|
|
280
|
+
}): AsyncGenerator<DirtyBucket[]>;
|
|
281
|
+
|
|
282
|
+
public abstract dirtyBucketBatchForChecksums(options: { minBucketChanges: number }): Promise<DirtyBucket[]>;
|
|
283
|
+
|
|
284
|
+
protected async compactDirtyBuckets() {
|
|
285
|
+
for await (const buckets of this.dirtyBucketBatches({
|
|
123
286
|
minBucketChanges: this.minBucketChanges,
|
|
124
287
|
minChangeRatio: this.minChangeRatio
|
|
125
288
|
})) {
|
|
@@ -128,8 +291,8 @@ export class MongoCompactor {
|
|
|
128
291
|
continue;
|
|
129
292
|
}
|
|
130
293
|
|
|
131
|
-
for (
|
|
132
|
-
await this.compactSingleBucketRetried(bucket);
|
|
294
|
+
for (const { bucket, definitionId } of buckets) {
|
|
295
|
+
await this.compactSingleBucketRetried(bucket, definitionId);
|
|
133
296
|
}
|
|
134
297
|
}
|
|
135
298
|
}
|
|
@@ -139,15 +302,15 @@ export class MongoCompactor {
|
|
|
139
302
|
*
|
|
140
303
|
* This covers against occasional network or other database errors during a long compact job.
|
|
141
304
|
*/
|
|
142
|
-
|
|
305
|
+
protected async compactSingleBucketRetried(bucket: string, definitionId: BucketDefinitionId | null = null) {
|
|
143
306
|
let retryCount = 0;
|
|
144
307
|
while (true) {
|
|
145
308
|
try {
|
|
146
|
-
await this.compactSingleBucket(bucket);
|
|
309
|
+
await this.compactSingleBucket(bucket, definitionId);
|
|
147
310
|
break;
|
|
148
311
|
} catch (e) {
|
|
149
312
|
if (retryCount < 3 && isMongoServerError(e)) {
|
|
150
|
-
logger.warn(`Error compacting bucket ${bucket}, retrying...`, e);
|
|
313
|
+
this.logger.warn(`Error compacting bucket ${bucket}, retrying...`, e);
|
|
151
314
|
retryCount++;
|
|
152
315
|
await new Promise((resolve) => setTimeout(resolve, 1000 * retryCount));
|
|
153
316
|
} else {
|
|
@@ -157,64 +320,64 @@ export class MongoCompactor {
|
|
|
157
320
|
}
|
|
158
321
|
}
|
|
159
322
|
|
|
160
|
-
|
|
323
|
+
protected async compactSingleBucket(bucket: string, definitionId: BucketDefinitionId | null = null) {
|
|
161
324
|
const idLimitBytes = this.idLimitBytes;
|
|
162
|
-
|
|
163
|
-
|
|
325
|
+
const bucketContext = await this.getBucketDataContext(bucket, definitionId);
|
|
326
|
+
if (bucketContext == null) {
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
const currentState: CurrentBucketState = {
|
|
164
330
|
bucket,
|
|
331
|
+
definitionId: bucketContext.key.definitionId,
|
|
165
332
|
seen: new Map(),
|
|
166
333
|
trackingSize: 0,
|
|
167
334
|
lastNotPut: null,
|
|
168
335
|
opsSincePut: 0,
|
|
169
|
-
|
|
170
336
|
checksum: 0,
|
|
171
337
|
opCount: 0,
|
|
172
338
|
opBytes: 0
|
|
173
339
|
};
|
|
174
340
|
|
|
175
|
-
// Constant lower bound
|
|
176
|
-
const lowerBound
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
o: new mongo.MinKey() as any
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
// Upper bound is adjusted for each batch
|
|
183
|
-
let upperBound: BucketDataKey = {
|
|
184
|
-
g: this.group_id,
|
|
185
|
-
b: bucket,
|
|
186
|
-
o: new mongo.MaxKey() as any
|
|
187
|
-
};
|
|
341
|
+
// Constant lower bound.
|
|
342
|
+
const lowerBound = bucketContext.minId;
|
|
343
|
+
// Upper bound is adjusted for each batch.
|
|
344
|
+
let upperBound = bucketContext.maxId;
|
|
188
345
|
|
|
189
346
|
while (true) {
|
|
190
347
|
this.signal?.throwIfAborted();
|
|
191
348
|
|
|
192
|
-
// Query one batch at a time, to avoid cursor timeouts
|
|
193
|
-
const
|
|
194
|
-
|
|
195
|
-
{
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
{ $limit: this.moveBatchQueryLimit },
|
|
205
|
-
{
|
|
206
|
-
$project: {
|
|
207
|
-
_id: 1,
|
|
208
|
-
op: 1,
|
|
209
|
-
table: 1,
|
|
210
|
-
row_id: 1,
|
|
211
|
-
source_table: 1,
|
|
212
|
-
source_key: 1,
|
|
213
|
-
checksum: 1,
|
|
214
|
-
size: { $bsonSize: '$$ROOT' }
|
|
215
|
-
}
|
|
349
|
+
// Query one batch at a time, to avoid cursor timeouts.
|
|
350
|
+
const pipeline = [
|
|
351
|
+
{
|
|
352
|
+
$match: {
|
|
353
|
+
_id: {
|
|
354
|
+
$gte: lowerBound,
|
|
355
|
+
$lt: upperBound
|
|
356
|
+
},
|
|
357
|
+
// Workaround for a clustered collection bug where the $lt operator may include upperBound.
|
|
358
|
+
// Technically only needed for storage V3.
|
|
359
|
+
// https://jira.mongodb.org/browse/SERVER-121822
|
|
360
|
+
'_id.o': { $lt: upperBound.o }
|
|
216
361
|
}
|
|
217
|
-
|
|
362
|
+
},
|
|
363
|
+
{ $sort: { _id: -1 } },
|
|
364
|
+
{ $limit: this.moveBatchQueryLimit },
|
|
365
|
+
{
|
|
366
|
+
$project: {
|
|
367
|
+
_id: 1,
|
|
368
|
+
op: 1,
|
|
369
|
+
table: 1,
|
|
370
|
+
row_id: 1,
|
|
371
|
+
source_table: 1,
|
|
372
|
+
source_key: 1,
|
|
373
|
+
checksum: 1,
|
|
374
|
+
size: { $bsonSize: '$$ROOT' }
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
const cursor = bucketContext.collection.aggregate<BucketDataDocumentGeneric & { size: number | bigint }>(
|
|
380
|
+
pipeline,
|
|
218
381
|
{
|
|
219
382
|
// batchSize is 1 more than limit to auto-close the cursor.
|
|
220
383
|
// See https://github.com/mongodb/node-mongodb-native/pull/4580
|
|
@@ -223,18 +386,25 @@ export class MongoCompactor {
|
|
|
223
386
|
);
|
|
224
387
|
// We don't limit to a single batch here, since that often causes MongoDB to scan through more than it returns.
|
|
225
388
|
// Instead, we load up to the limit.
|
|
226
|
-
const
|
|
389
|
+
const rawBatch = await cursor.toArray();
|
|
390
|
+
const batch = rawBatch.map((document) => {
|
|
391
|
+
const { size, ...rest } = document;
|
|
392
|
+
return {
|
|
393
|
+
doc: bucketContext.fromPersistedDocument(rest),
|
|
394
|
+
size
|
|
395
|
+
};
|
|
396
|
+
});
|
|
227
397
|
|
|
228
398
|
if (batch.length == 0) {
|
|
229
|
-
// We've reached the end
|
|
399
|
+
// We've reached the end.
|
|
230
400
|
break;
|
|
231
401
|
}
|
|
232
402
|
|
|
233
|
-
//
|
|
234
|
-
upperBound =
|
|
403
|
+
// Reuse the exact collection _id value from Mongo for the next bound.
|
|
404
|
+
upperBound = rawBatch[rawBatch.length - 1]._id;
|
|
235
405
|
|
|
236
|
-
for (
|
|
237
|
-
if (doc.
|
|
406
|
+
for (const { doc, size } of batch) {
|
|
407
|
+
if (doc.o > this.maxOpId) {
|
|
238
408
|
continue;
|
|
239
409
|
}
|
|
240
410
|
|
|
@@ -243,19 +413,17 @@ export class MongoCompactor {
|
|
|
243
413
|
|
|
244
414
|
let isPersistentPut = doc.op == 'PUT';
|
|
245
415
|
|
|
246
|
-
currentState.opBytes += Number(
|
|
416
|
+
currentState.opBytes += Number(size);
|
|
247
417
|
if (doc.op == 'REMOVE' || doc.op == 'PUT') {
|
|
248
418
|
const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
|
|
249
419
|
const targetOp = currentState.seen.get(key);
|
|
250
420
|
if (targetOp) {
|
|
251
|
-
// Will convert to MOVE, so don't count as PUT
|
|
421
|
+
// Will convert to MOVE, so don't count as PUT.
|
|
252
422
|
isPersistentPut = false;
|
|
253
423
|
|
|
254
424
|
this.updates.push({
|
|
255
425
|
updateOne: {
|
|
256
|
-
filter: {
|
|
257
|
-
_id: doc._id
|
|
258
|
-
},
|
|
426
|
+
filter: { _id: bucketContext.docId(doc.o) },
|
|
259
427
|
update: {
|
|
260
428
|
$set: {
|
|
261
429
|
op: 'MOVE',
|
|
@@ -268,24 +436,20 @@ export class MongoCompactor {
|
|
|
268
436
|
row_id: 1,
|
|
269
437
|
data: 1
|
|
270
438
|
}
|
|
271
|
-
}
|
|
439
|
+
} satisfies mongo.UpdateFilter<BucketDataDocumentGeneric>
|
|
272
440
|
}
|
|
273
441
|
});
|
|
274
442
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
// 50 for map overhead
|
|
286
|
-
// 50 for additional overhead
|
|
287
|
-
currentState.trackingSize += key.length + 140;
|
|
288
|
-
}
|
|
443
|
+
// TODO: better estimate for this.
|
|
444
|
+
currentState.opBytes += 200 - Number(size);
|
|
445
|
+
} else if (currentState.trackingSize < idLimitBytes) {
|
|
446
|
+
// flatstr reduces the memory usage by flattening the string.
|
|
447
|
+
currentState.seen.set(utils.flatstr(key), doc.o);
|
|
448
|
+
// length + 16 for the string
|
|
449
|
+
// 24 for the bigint
|
|
450
|
+
// 50 for map overhead
|
|
451
|
+
// 50 for additional overhead
|
|
452
|
+
currentState.trackingSize += key.length + 140;
|
|
289
453
|
}
|
|
290
454
|
}
|
|
291
455
|
|
|
@@ -294,41 +458,37 @@ export class MongoCompactor {
|
|
|
294
458
|
currentState.opsSincePut = 0;
|
|
295
459
|
} else if (doc.op != 'CLEAR') {
|
|
296
460
|
if (currentState.lastNotPut == null) {
|
|
297
|
-
currentState.lastNotPut = doc.
|
|
461
|
+
currentState.lastNotPut = doc.o;
|
|
298
462
|
}
|
|
299
463
|
currentState.opsSincePut += 1;
|
|
300
464
|
}
|
|
301
465
|
|
|
302
466
|
if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
|
|
303
|
-
await this.flush();
|
|
467
|
+
await this.flush(bucketContext);
|
|
304
468
|
}
|
|
305
469
|
}
|
|
306
470
|
|
|
307
|
-
logger.info(`Processed batch of length ${batch.length} current bucket: ${bucket}`);
|
|
471
|
+
this.logger.info(`Processed batch of length ${batch.length} current bucket: ${bucket}`);
|
|
308
472
|
}
|
|
309
473
|
|
|
310
|
-
// Free memory before clearing bucket
|
|
474
|
+
// Free memory before clearing the bucket.
|
|
311
475
|
currentState.seen.clear();
|
|
312
476
|
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
313
|
-
logger.info(
|
|
477
|
+
this.logger.info(
|
|
314
478
|
`Inserting CLEAR at ${this.group_id}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
315
479
|
);
|
|
316
|
-
// Need flush() before clear()
|
|
317
|
-
await this.flush();
|
|
318
|
-
await this.clearBucket(currentState);
|
|
480
|
+
// Need flush() before clear().
|
|
481
|
+
await this.flush(bucketContext);
|
|
482
|
+
await this.clearBucket(currentState, bucketContext);
|
|
319
483
|
}
|
|
320
484
|
|
|
321
|
-
// Do this
|
|
485
|
+
// Do this after clearBucket so we have accurate counts.
|
|
322
486
|
this.updateBucketChecksums(currentState);
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
await this.flush();
|
|
487
|
+
// Need another flush after updateBucketChecksums().
|
|
488
|
+
await this.flush(bucketContext);
|
|
326
489
|
}
|
|
327
490
|
|
|
328
|
-
|
|
329
|
-
* Call when done with a bucket.
|
|
330
|
-
*/
|
|
331
|
-
private updateBucketChecksums(state: CurrentBucketState) {
|
|
491
|
+
protected updateBucketChecksums(state: CurrentBucketState) {
|
|
332
492
|
if (state.opCount < 0) {
|
|
333
493
|
throw new ServiceAssertionError(
|
|
334
494
|
`Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
|
|
@@ -336,12 +496,7 @@ export class MongoCompactor {
|
|
|
336
496
|
}
|
|
337
497
|
this.bucketStateUpdates.push({
|
|
338
498
|
updateOne: {
|
|
339
|
-
filter:
|
|
340
|
-
_id: {
|
|
341
|
-
g: this.group_id,
|
|
342
|
-
b: state.bucket
|
|
343
|
-
}
|
|
344
|
-
},
|
|
499
|
+
filter: this.bucketStateFilter(state.bucket, state.definitionId),
|
|
345
500
|
update: {
|
|
346
501
|
$set: {
|
|
347
502
|
compacted_state: {
|
|
@@ -351,14 +506,13 @@ export class MongoCompactor {
|
|
|
351
506
|
bytes: state.opBytes
|
|
352
507
|
},
|
|
353
508
|
estimate_since_compact: {
|
|
354
|
-
//
|
|
355
|
-
// which we don't currently cater for.
|
|
356
|
-
// We could potentially query for that, but that could add overhead.
|
|
509
|
+
// There could have been a whole bunch of new operations added to the bucket while compacting,
|
|
510
|
+
// which we don't currently cater for. We could potentially query for that, but that adds overhead.
|
|
357
511
|
count: 0,
|
|
358
512
|
bytes: 0
|
|
359
513
|
}
|
|
360
514
|
}
|
|
361
|
-
}
|
|
515
|
+
} satisfies mongo.UpdateFilter<BucketStateDocumentBase>,
|
|
362
516
|
// We generally expect this to have been created before.
|
|
363
517
|
// We don't create new ones here, to avoid issues with the unique index on bucket_updates.
|
|
364
518
|
upsert: false
|
|
@@ -366,23 +520,24 @@ export class MongoCompactor {
|
|
|
366
520
|
});
|
|
367
521
|
}
|
|
368
522
|
|
|
369
|
-
|
|
523
|
+
protected async flush(col: SingleBucketStore) {
|
|
370
524
|
if (this.updates.length > 0) {
|
|
371
|
-
logger.info(`Compacting ${this.updates.length} ops`);
|
|
372
|
-
await
|
|
373
|
-
// Order is not important.
|
|
374
|
-
//
|
|
375
|
-
// and it's fine if the operations are partially applied.
|
|
376
|
-
// Each individual operation is atomic.
|
|
525
|
+
this.logger.info(`Compacting ${this.updates.length} ops`);
|
|
526
|
+
await col.collection.bulkWrite(this.updates, {
|
|
527
|
+
// Order is not important. Since checksums are not affected, these operations can happen in any order,
|
|
528
|
+
// and it's fine if the operations are partially applied. Each individual operation is atomic.
|
|
377
529
|
ordered: false
|
|
378
530
|
});
|
|
379
531
|
this.updates = [];
|
|
380
532
|
}
|
|
533
|
+
|
|
534
|
+
await this.flushBucketStateUpdates();
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
private async flushBucketStateUpdates() {
|
|
381
538
|
if (this.bucketStateUpdates.length > 0) {
|
|
382
|
-
logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
383
|
-
await this.
|
|
384
|
-
ordered: false
|
|
385
|
-
});
|
|
539
|
+
this.logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
540
|
+
await this.writeBucketStateUpdates();
|
|
386
541
|
this.bucketStateUpdates = [];
|
|
387
542
|
}
|
|
388
543
|
}
|
|
@@ -390,26 +545,15 @@ export class MongoCompactor {
|
|
|
390
545
|
/**
|
|
391
546
|
* Perform a CLEAR compact for a bucket.
|
|
392
547
|
*
|
|
393
|
-
*
|
|
394
|
-
* @param bucket bucket name
|
|
395
|
-
* @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
|
|
548
|
+
* @param currentState tracks the last non-PUT op, which will be converted to CLEAR.
|
|
396
549
|
*/
|
|
397
|
-
|
|
398
|
-
const bucket = currentState.bucket;
|
|
550
|
+
protected async clearBucket(currentState: CurrentBucketState, col: SingleBucketStore) {
|
|
399
551
|
const clearOp = currentState.lastNotPut!;
|
|
400
552
|
|
|
401
553
|
const opFilter = {
|
|
402
554
|
_id: {
|
|
403
|
-
$gte:
|
|
404
|
-
|
|
405
|
-
b: bucket,
|
|
406
|
-
o: new mongo.MinKey() as any
|
|
407
|
-
},
|
|
408
|
-
$lte: {
|
|
409
|
-
g: this.group_id,
|
|
410
|
-
b: bucket,
|
|
411
|
-
o: clearOp
|
|
412
|
-
}
|
|
555
|
+
$gte: col.minId,
|
|
556
|
+
$lte: col.docId(clearOp)
|
|
413
557
|
}
|
|
414
558
|
};
|
|
415
559
|
|
|
@@ -424,39 +568,40 @@ export class MongoCompactor {
|
|
|
424
568
|
// We need a transaction per batch to make sure checksums stay consistent.
|
|
425
569
|
await session.withTransaction(
|
|
426
570
|
async () => {
|
|
427
|
-
const query =
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
_id: 1,
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
571
|
+
const query = col.collection.find<Pick<BucketDataDocumentGeneric, '_id' | CompactClearProperties>>(
|
|
572
|
+
opFilter,
|
|
573
|
+
{
|
|
574
|
+
session,
|
|
575
|
+
sort: { _id: 1 },
|
|
576
|
+
projection: {
|
|
577
|
+
_id: 1,
|
|
578
|
+
op: 1,
|
|
579
|
+
checksum: 1,
|
|
580
|
+
target_op: 1
|
|
581
|
+
},
|
|
582
|
+
limit: this.clearBatchLimit
|
|
583
|
+
}
|
|
584
|
+
);
|
|
438
585
|
let checksum = 0;
|
|
439
|
-
let
|
|
586
|
+
let lastOp: Pick<BucketDataDoc, 'o' | CompactClearProperties> | null = null;
|
|
440
587
|
let targetOp: bigint | null = null;
|
|
441
588
|
let gotAnOp = false;
|
|
442
589
|
let numberOfOpsToClear = 0;
|
|
443
|
-
for await (
|
|
590
|
+
for await (const rawOp of query.stream()) {
|
|
591
|
+
const op = col.fromPartialPersistedDocument(rawOp);
|
|
592
|
+
|
|
444
593
|
if (op.op == 'MOVE' || op.op == 'REMOVE' || op.op == 'CLEAR') {
|
|
445
594
|
checksum = utils.addChecksums(checksum, Number(op.checksum));
|
|
446
|
-
|
|
595
|
+
lastOp = op;
|
|
447
596
|
numberOfOpsToClear += 1;
|
|
448
597
|
if (op.op != 'CLEAR') {
|
|
449
598
|
gotAnOp = true;
|
|
450
599
|
}
|
|
451
|
-
if (op.target_op != null) {
|
|
452
|
-
|
|
453
|
-
targetOp = op.target_op;
|
|
454
|
-
}
|
|
600
|
+
if (op.target_op != null && (targetOp == null || op.target_op > targetOp)) {
|
|
601
|
+
targetOp = op.target_op;
|
|
455
602
|
}
|
|
456
603
|
} else {
|
|
457
|
-
throw new ReplicationAssertionError(
|
|
458
|
-
`Unexpected ${op.op} operation at ${op._id.g}:${op._id.b}:${op._id.o}`
|
|
459
|
-
);
|
|
604
|
+
throw new ReplicationAssertionError(`Unexpected ${op.op} operation at ${this.formatBucketDataKey(op)}`);
|
|
460
605
|
}
|
|
461
606
|
}
|
|
462
607
|
if (!gotAnOp) {
|
|
@@ -464,31 +609,25 @@ export class MongoCompactor {
|
|
|
464
609
|
return;
|
|
465
610
|
}
|
|
466
611
|
|
|
467
|
-
logger.info(`Flushing CLEAR for ${numberOfOpsToClear} ops at ${
|
|
468
|
-
await
|
|
612
|
+
this.logger.info(`Flushing CLEAR for ${numberOfOpsToClear} ops at ${lastOp?.o}`);
|
|
613
|
+
await col.collection.deleteMany(
|
|
469
614
|
{
|
|
470
615
|
_id: {
|
|
471
|
-
$gte:
|
|
472
|
-
|
|
473
|
-
b: bucket,
|
|
474
|
-
o: new mongo.MinKey() as any
|
|
475
|
-
},
|
|
476
|
-
$lte: lastOpId!
|
|
616
|
+
$gte: col.minId,
|
|
617
|
+
$lte: col.docId(lastOp!.o)
|
|
477
618
|
}
|
|
478
619
|
},
|
|
479
620
|
{ session }
|
|
480
621
|
);
|
|
481
622
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
{ session }
|
|
491
|
-
);
|
|
623
|
+
const op = col.toPersistedDocument({
|
|
624
|
+
o: lastOp!.o,
|
|
625
|
+
op: 'CLEAR',
|
|
626
|
+
checksum: BigInt(checksum),
|
|
627
|
+
data: null,
|
|
628
|
+
target_op: targetOp
|
|
629
|
+
});
|
|
630
|
+
await col.collection.insertOne(op, { session });
|
|
492
631
|
|
|
493
632
|
opCountDiff = -numberOfOpsToClear + 1;
|
|
494
633
|
},
|
|
@@ -497,7 +636,7 @@ export class MongoCompactor {
|
|
|
497
636
|
readConcern: { level: 'snapshot' }
|
|
498
637
|
}
|
|
499
638
|
);
|
|
500
|
-
// Update
|
|
639
|
+
// Update outside the transaction, since the transaction can be retried multiple times.
|
|
501
640
|
currentState.opCount += opCountDiff;
|
|
502
641
|
}
|
|
503
642
|
} finally {
|
|
@@ -505,211 +644,22 @@ export class MongoCompactor {
|
|
|
505
644
|
}
|
|
506
645
|
}
|
|
507
646
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
async populateChecksums(options: { minBucketChanges: number }): Promise<PopulateChecksumCacheResults> {
|
|
512
|
-
let count = 0;
|
|
513
|
-
while (true) {
|
|
514
|
-
this.signal?.throwIfAborted();
|
|
515
|
-
const buckets = await this.dirtyBucketBatchForChecksums(options);
|
|
516
|
-
if (buckets.length == 0) {
|
|
517
|
-
// All done
|
|
518
|
-
break;
|
|
519
|
-
}
|
|
520
|
-
this.signal?.throwIfAborted();
|
|
521
|
-
|
|
522
|
-
const start = Date.now();
|
|
523
|
-
|
|
524
|
-
// Filter batch by estimated bucket size, to reduce possibility of timeouts
|
|
525
|
-
let checkBuckets: typeof buckets = [];
|
|
526
|
-
let totalCountEstimate = 0;
|
|
527
|
-
for (let bucket of buckets) {
|
|
528
|
-
checkBuckets.push(bucket);
|
|
529
|
-
totalCountEstimate += bucket.estimatedCount;
|
|
530
|
-
if (totalCountEstimate > 50_000) {
|
|
531
|
-
break;
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
logger.info(
|
|
535
|
-
`Calculating checksums for batch of ${buckets.length} buckets, estimated count of ${totalCountEstimate}`
|
|
536
|
-
);
|
|
537
|
-
await this.updateChecksumsBatch(checkBuckets.map((b) => b.bucket));
|
|
538
|
-
logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`);
|
|
539
|
-
count += checkBuckets.length;
|
|
540
|
-
}
|
|
541
|
-
return { buckets: count };
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
/**
|
|
545
|
-
* Return batches of dirty buckets.
|
|
546
|
-
*
|
|
547
|
-
* Can be used to iterate through all buckets.
|
|
548
|
-
*
|
|
549
|
-
* minBucketChanges: minimum number of changes for a bucket to be included in the results.
|
|
550
|
-
* minChangeRatio: minimum ratio of changes to total ops for a bucket to be included in the results, number between 0 and 1.
|
|
551
|
-
*/
|
|
552
|
-
private async *dirtyBucketBatches(options: {
|
|
553
|
-
minBucketChanges: number;
|
|
554
|
-
minChangeRatio: number;
|
|
555
|
-
}): AsyncGenerator<{ bucket: string; estimatedCount: number }[]> {
|
|
556
|
-
// Previously, we used an index on {_id.g: 1, estimate_since_compact.count: 1} to only buckets with changes.
|
|
557
|
-
// This works well if there are only a small number of buckets with changes.
|
|
558
|
-
// However, if buckets are continuosly modified while we are compacting, we get the same buckets over and over again.
|
|
559
|
-
// This has caused the compact process to re-read the same collection around 5x times in total, which is very inefficient.
|
|
560
|
-
// To solve this, we now just iterate through all buckets, and filter out the ones with low changes.
|
|
561
|
-
|
|
562
|
-
if (options.minBucketChanges <= 0) {
|
|
563
|
-
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
564
|
-
}
|
|
565
|
-
let lastId = { g: this.group_id, b: new mongo.MinKey() as any };
|
|
566
|
-
const maxId = { g: this.group_id, b: new mongo.MaxKey() as any };
|
|
567
|
-
while (true) {
|
|
568
|
-
// To avoid timeouts from too many buckets not meeting the minBucketChanges criteria, we use an aggregation pipeline
|
|
569
|
-
// to scan a fixed batch of buckets at a time, but only return buckets that meet the criteria, rather than limiting
|
|
570
|
-
// on the output number.
|
|
571
|
-
const [result] = await this.db.bucket_state
|
|
572
|
-
.aggregate<{
|
|
573
|
-
buckets: Pick<BucketStateDocument, '_id' | 'estimate_since_compact' | 'compacted_state'>[];
|
|
574
|
-
cursor: Pick<BucketStateDocument, '_id'>[];
|
|
575
|
-
}>(
|
|
576
|
-
[
|
|
577
|
-
{
|
|
578
|
-
$match: {
|
|
579
|
-
_id: { $gt: lastId, $lt: maxId }
|
|
580
|
-
}
|
|
581
|
-
},
|
|
582
|
-
{
|
|
583
|
-
$sort: { _id: 1 }
|
|
584
|
-
},
|
|
585
|
-
{
|
|
586
|
-
// Scan a fixed number of docs each query so sparse matches don't block progress.
|
|
587
|
-
$limit: DIRTY_BUCKET_SCAN_BATCH_SIZE
|
|
588
|
-
},
|
|
589
|
-
{
|
|
590
|
-
$facet: {
|
|
591
|
-
// This is the results for the batch
|
|
592
|
-
buckets: [
|
|
593
|
-
{
|
|
594
|
-
$match: {
|
|
595
|
-
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
596
|
-
}
|
|
597
|
-
},
|
|
598
|
-
{
|
|
599
|
-
$project: {
|
|
600
|
-
_id: 1,
|
|
601
|
-
estimate_since_compact: 1,
|
|
602
|
-
compacted_state: 1
|
|
603
|
-
}
|
|
604
|
-
}
|
|
605
|
-
],
|
|
606
|
-
// This is used for the next query.
|
|
607
|
-
cursor: [{ $sort: { _id: -1 } }, { $limit: 1 }, { $project: { _id: 1 } }]
|
|
608
|
-
}
|
|
609
|
-
}
|
|
610
|
-
],
|
|
611
|
-
{ maxTimeMS: MONGO_OPERATION_TIMEOUT_MS }
|
|
612
|
-
)
|
|
613
|
-
.toArray();
|
|
614
|
-
|
|
615
|
-
const cursor = result?.cursor?.[0];
|
|
616
|
-
if (cursor == null) {
|
|
617
|
-
break;
|
|
618
|
-
}
|
|
619
|
-
lastId = cursor._id;
|
|
620
|
-
|
|
621
|
-
const mapped = (result?.buckets ?? []).map((b) => {
|
|
622
|
-
// The numbers, specifically the bytes, could be a bigint. We convert to Number to allow calculating the ratios.
|
|
623
|
-
// BigInt precision is not needed here since it's just an estimate.
|
|
624
|
-
const updatedCount = b.estimate_since_compact?.count ?? 0;
|
|
625
|
-
const totalCount = (b.compacted_state?.count ?? 0) + updatedCount;
|
|
626
|
-
const updatedBytes = Number(b.estimate_since_compact?.bytes ?? 0);
|
|
627
|
-
const totalBytes = Number(b.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
628
|
-
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
629
|
-
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
630
|
-
return {
|
|
631
|
-
bucket: b._id.b,
|
|
632
|
-
estimatedCount: totalCount,
|
|
633
|
-
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
634
|
-
};
|
|
635
|
-
});
|
|
636
|
-
const filtered = mapped.filter(
|
|
637
|
-
(b) => b.estimatedCount >= options.minBucketChanges && b.dirtyRatio >= options.minChangeRatio
|
|
638
|
-
);
|
|
639
|
-
yield filtered;
|
|
640
|
-
}
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
/**
|
|
644
|
-
* Returns a batch of dirty buckets - buckets with most changes first.
|
|
645
|
-
*
|
|
646
|
-
* This cannot be used to iterate on its own - the client is expected to process these buckets and
|
|
647
|
-
* set estimate_since_compact.count: 0 when done, before fetching the next batch.
|
|
648
|
-
*
|
|
649
|
-
* Unlike dirtyBucketBatches, used for compacting, this is specifically designed to be resuamble after a restart,
|
|
650
|
-
* since it is used as the last step for initial replication.
|
|
651
|
-
*
|
|
652
|
-
* We currently don't get new data while doing populateChecksums, so we don't need to worry about buckets changing while processing.
|
|
653
|
-
*/
|
|
654
|
-
private async dirtyBucketBatchForChecksums(options: {
|
|
655
|
-
minBucketChanges: number;
|
|
656
|
-
}): Promise<{ bucket: string; estimatedCount: number }[]> {
|
|
657
|
-
if (options.minBucketChanges <= 0) {
|
|
658
|
-
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
659
|
-
}
|
|
660
|
-
// We make use of an index on {_id.g: 1, 'estimate_since_compact.count': -1}
|
|
661
|
-
const dirtyBuckets = await this.db.bucket_state
|
|
662
|
-
.find(
|
|
663
|
-
{
|
|
664
|
-
'_id.g': this.group_id,
|
|
665
|
-
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
666
|
-
},
|
|
667
|
-
{
|
|
668
|
-
projection: {
|
|
669
|
-
_id: 1,
|
|
670
|
-
estimate_since_compact: 1,
|
|
671
|
-
compacted_state: 1
|
|
672
|
-
},
|
|
673
|
-
sort: {
|
|
674
|
-
'estimate_since_compact.count': -1
|
|
675
|
-
},
|
|
676
|
-
limit: 200,
|
|
677
|
-
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
678
|
-
}
|
|
679
|
-
)
|
|
680
|
-
.toArray();
|
|
647
|
+
protected async updateChecksumsBatch(buckets: Pick<DirtyBucket, 'bucket' | 'definitionId'>[]) {
|
|
648
|
+
const checksums = await this.computeChecksumsForBuckets(buckets);
|
|
649
|
+
const definitionIdByBucket = new Map(buckets.map((bucket) => [bucket.bucket, bucket.definitionId]));
|
|
681
650
|
|
|
682
|
-
|
|
683
|
-
bucket: bucket._id.b,
|
|
684
|
-
estimatedCount: Number(bucket.estimate_since_compact!.count) + Number(bucket.compacted_state?.count ?? 0)
|
|
685
|
-
}));
|
|
686
|
-
}
|
|
687
|
-
|
|
688
|
-
private async updateChecksumsBatch(buckets: string[]) {
|
|
689
|
-
const checksums = await this.storage.checksums.computePartialChecksumsDirect(
|
|
690
|
-
buckets.map((bucket) => {
|
|
691
|
-
return {
|
|
692
|
-
bucket,
|
|
693
|
-
source: {} as any,
|
|
694
|
-
end: this.maxOpId
|
|
695
|
-
};
|
|
696
|
-
})
|
|
697
|
-
);
|
|
698
|
-
|
|
699
|
-
for (let bucketChecksum of checksums.values()) {
|
|
651
|
+
for (const bucketChecksum of checksums.values()) {
|
|
700
652
|
if (isPartialChecksum(bucketChecksum)) {
|
|
701
|
-
// Should never happen since we don't specify `start
|
|
653
|
+
// Should never happen since we don't specify `start`.
|
|
702
654
|
throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
|
|
703
655
|
}
|
|
704
656
|
|
|
705
657
|
this.bucketStateUpdates.push({
|
|
706
658
|
updateOne: {
|
|
707
|
-
filter:
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
}
|
|
712
|
-
},
|
|
659
|
+
filter: this.bucketStateFilter(
|
|
660
|
+
bucketChecksum.bucket,
|
|
661
|
+
definitionIdByBucket.get(bucketChecksum.bucket) ?? null
|
|
662
|
+
),
|
|
713
663
|
update: {
|
|
714
664
|
$set: {
|
|
715
665
|
compacted_state: {
|
|
@@ -723,14 +673,34 @@ export class MongoCompactor {
|
|
|
723
673
|
bytes: 0
|
|
724
674
|
}
|
|
725
675
|
}
|
|
726
|
-
}
|
|
727
|
-
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on
|
|
728
|
-
// bucket_updates
|
|
676
|
+
} satisfies mongo.UpdateFilter<BucketStateDocumentBase>,
|
|
677
|
+
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on
|
|
678
|
+
// bucket_updates.
|
|
729
679
|
upsert: false
|
|
730
680
|
}
|
|
731
681
|
});
|
|
732
682
|
}
|
|
733
683
|
|
|
734
|
-
await this.
|
|
684
|
+
await this.flushBucketStateUpdates();
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
protected formatBucketDataKey(doc: Pick<BucketDataDoc, 'bucketKey' | 'o'>) {
|
|
688
|
+
return `${doc.bucketKey.replicationStreamId}:${doc.bucketKey.bucket}:${doc.o}`;
|
|
735
689
|
}
|
|
690
|
+
|
|
691
|
+
protected abstract writeBucketStateUpdates(): Promise<void>;
|
|
692
|
+
protected abstract computeChecksumsForBuckets(
|
|
693
|
+
buckets: Pick<DirtyBucket, 'bucket' | 'definitionId'>[]
|
|
694
|
+
): Promise<storage.PartialChecksumMap>;
|
|
695
|
+
protected abstract bucketStateFilter(bucket: string, definitionId: BucketDefinitionId | null): mongo.Document;
|
|
696
|
+
|
|
697
|
+
protected abstract getBucketDataContext(
|
|
698
|
+
bucket: string,
|
|
699
|
+
definitionId: BucketDefinitionId | null
|
|
700
|
+
): Promise<SingleBucketStore | null>;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
export interface BucketDataCollectionContext<TBucketData extends mongo.Document> {
|
|
704
|
+
bucketKey: BucketKey;
|
|
705
|
+
collection: mongo.Collection<TBucketData>;
|
|
736
706
|
}
|