@powersync/service-module-mongodb-storage 0.15.3 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +1 -1
- package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -1
- package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +3 -3
- package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -1
- package/dist/migrations/db/migrations/1770213298299-storage-version.js.map +1 -1
- package/dist/storage/MongoBucketStorage.d.ts +5 -3
- package/dist/storage/MongoBucketStorage.js +50 -36
- package/dist/storage/MongoBucketStorage.js.map +1 -1
- package/dist/storage/MongoReportStorage.js.map +1 -1
- package/dist/storage/implementation/BucketDefinitionMapping.d.ts +17 -0
- package/dist/storage/implementation/BucketDefinitionMapping.js +58 -0
- package/dist/storage/implementation/BucketDefinitionMapping.js.map +1 -0
- package/dist/storage/implementation/MongoBucketBatch.d.ts +16 -14
- package/dist/storage/implementation/MongoBucketBatch.js +80 -115
- package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
- package/dist/storage/implementation/MongoBucketBatchShared.d.ts +5 -0
- package/dist/storage/implementation/MongoBucketBatchShared.js +8 -0
- package/dist/storage/implementation/MongoBucketBatchShared.js.map +1 -0
- package/dist/storage/implementation/MongoChecksums.d.ts +28 -17
- package/dist/storage/implementation/MongoChecksums.js +13 -72
- package/dist/storage/implementation/MongoChecksums.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.d.ts +98 -58
- package/dist/storage/implementation/MongoCompactor.js +229 -296
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoParameterCompactor.d.ts +11 -6
- package/dist/storage/implementation/MongoParameterCompactor.js +11 -8
- package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +14 -0
- package/dist/storage/implementation/MongoPersistedSyncRules.js +64 -0
- package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +3 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +9 -0
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
- package/dist/storage/implementation/MongoStorageProvider.js +1 -1
- package/dist/storage/implementation/MongoStorageProvider.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +49 -30
- package/dist/storage/implementation/MongoSyncBucketStorage.js +96 -388
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/MongoSyncRulesLock.d.ts +5 -3
- package/dist/storage/implementation/MongoSyncRulesLock.js +12 -10
- package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
- package/dist/storage/implementation/MongoWriteCheckpointAPI.js +1 -1
- package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
- package/dist/storage/implementation/OperationBatch.js +1 -1
- package/dist/storage/implementation/common/BucketDataDoc.d.ts +35 -0
- package/dist/storage/implementation/common/BucketDataDoc.js +2 -0
- package/dist/storage/implementation/common/BucketDataDoc.js.map +1 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.d.ts +13 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js +2 -0
- package/dist/storage/implementation/common/MongoSyncBucketStorageContext.js.map +1 -0
- package/dist/storage/implementation/common/PersistedBatch.d.ts +108 -0
- package/dist/storage/implementation/common/PersistedBatch.js +237 -0
- package/dist/storage/implementation/common/PersistedBatch.js.map +1 -0
- package/dist/storage/implementation/common/SingleBucketStore.d.ts +54 -0
- package/dist/storage/implementation/common/SingleBucketStore.js +3 -0
- package/dist/storage/implementation/common/SingleBucketStore.js.map +1 -0
- package/dist/storage/implementation/common/SourceRecordStore.d.ts +36 -0
- package/dist/storage/implementation/common/SourceRecordStore.js +2 -0
- package/dist/storage/implementation/common/SourceRecordStore.js.map +1 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.d.ts +27 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js +57 -0
- package/dist/storage/implementation/common/VersionedPowerSyncMongoBase.js.map +1 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.d.ts +7 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.js +9 -0
- package/dist/storage/implementation/createMongoSyncBucketStorage.js.map +1 -0
- package/dist/storage/implementation/db.d.ts +34 -34
- package/dist/storage/implementation/db.js +78 -98
- package/dist/storage/implementation/db.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +63 -34
- package/dist/storage/implementation/models.js +21 -2
- package/dist/storage/implementation/models.js.map +1 -1
- package/dist/storage/implementation/v1/MongoBucketBatchV1.d.ts +13 -0
- package/dist/storage/implementation/v1/MongoBucketBatchV1.js +22 -0
- package/dist/storage/implementation/v1/MongoBucketBatchV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.d.ts +12 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.js +56 -0
- package/dist/storage/implementation/v1/MongoChecksumsV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.d.ts +23 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.js +52 -0
- package/dist/storage/implementation/v1/MongoCompactorV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.d.ts +9 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.js +20 -0
- package/dist/storage/implementation/v1/MongoParameterCompactorV1.js.map +1 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.d.ts +41 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js +283 -0
- package/dist/storage/implementation/v1/MongoSyncBucketStorageV1.js.map +1 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.d.ts +26 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.js +183 -0
- package/dist/storage/implementation/v1/PersistedBatchV1.js.map +1 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.d.ts +18 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.js +57 -0
- package/dist/storage/implementation/v1/SingleBucketStoreV1.js.map +1 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.d.ts +19 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.js +105 -0
- package/dist/storage/implementation/v1/SourceRecordStoreV1.js.map +1 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.d.ts +12 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js +20 -0
- package/dist/storage/implementation/v1/VersionedPowerSyncMongoV1.js.map +1 -0
- package/dist/storage/implementation/v1/models.d.ts +34 -0
- package/dist/storage/implementation/v1/models.js +37 -0
- package/dist/storage/implementation/v1/models.js.map +1 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.d.ts +13 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.js +34 -0
- package/dist/storage/implementation/v3/MongoBucketBatchV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.d.ts +15 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.js +84 -0
- package/dist/storage/implementation/v3/MongoChecksumsV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.d.ts +23 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.js +68 -0
- package/dist/storage/implementation/v3/MongoCompactorV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.d.ts +9 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.js +18 -0
- package/dist/storage/implementation/v3/MongoParameterCompactorV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.d.ts +5 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.js +9 -0
- package/dist/storage/implementation/v3/MongoParameterLookupV3.js.map +1 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.d.ts +41 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js +407 -0
- package/dist/storage/implementation/v3/MongoSyncBucketStorageV3.js.map +1 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.d.ts +29 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.js +259 -0
- package/dist/storage/implementation/v3/PersistedBatchV3.js.map +1 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.d.ts +18 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.js +48 -0
- package/dist/storage/implementation/v3/SingleBucketStoreV3.js.map +1 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.d.ts +22 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.js +164 -0
- package/dist/storage/implementation/v3/SourceRecordStoreV3.js.map +1 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.d.ts +21 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js +71 -0
- package/dist/storage/implementation/v3/VersionedPowerSyncMongoV3.js.map +1 -0
- package/dist/storage/implementation/v3/models.d.ts +43 -0
- package/dist/storage/implementation/v3/models.js +34 -0
- package/dist/storage/implementation/v3/models.js.map +1 -0
- package/dist/storage/storage-index.d.ts +8 -5
- package/dist/storage/storage-index.js +8 -5
- package/dist/storage/storage-index.js.map +1 -1
- package/dist/utils/util.d.ts +11 -4
- package/dist/utils/util.js +25 -4
- package/dist/utils/util.js.map +1 -1
- package/package.json +9 -9
- package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +1 -1
- package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +7 -7
- package/src/migrations/db/migrations/1770213298299-storage-version.ts +1 -1
- package/src/storage/MongoBucketStorage.ts +97 -62
- package/src/storage/MongoReportStorage.ts +2 -2
- package/src/storage/implementation/BucketDefinitionMapping.ts +72 -0
- package/src/storage/implementation/MongoBucketBatch.ts +110 -144
- package/src/storage/implementation/MongoBucketBatchShared.ts +11 -0
- package/src/storage/implementation/MongoChecksums.ts +53 -76
- package/src/storage/implementation/MongoCompactor.ts +374 -404
- package/src/storage/implementation/MongoParameterCompactor.ts +37 -24
- package/src/storage/implementation/MongoPersistedSyncRules.ts +76 -0
- package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +18 -1
- package/src/storage/implementation/MongoStorageProvider.ts +1 -1
- package/src/storage/implementation/MongoSyncBucketStorage.ts +190 -457
- package/src/storage/implementation/MongoSyncRulesLock.ts +12 -14
- package/src/storage/implementation/MongoWriteCheckpointAPI.ts +4 -2
- package/src/storage/implementation/OperationBatch.ts +1 -1
- package/src/storage/implementation/common/BucketDataDoc.ts +37 -0
- package/src/storage/implementation/common/MongoSyncBucketStorageContext.ts +15 -0
- package/src/storage/implementation/common/PersistedBatch.ts +364 -0
- package/src/storage/implementation/common/SingleBucketStore.ts +63 -0
- package/src/storage/implementation/common/SourceRecordStore.ts +49 -0
- package/src/storage/implementation/common/VersionedPowerSyncMongoBase.ts +80 -0
- package/src/storage/implementation/createMongoSyncBucketStorage.ts +25 -0
- package/src/storage/implementation/db.ts +107 -128
- package/src/storage/implementation/models.ts +84 -38
- package/src/storage/implementation/v1/MongoBucketBatchV1.ts +32 -0
- package/src/storage/implementation/v1/MongoChecksumsV1.ts +75 -0
- package/src/storage/implementation/v1/MongoCompactorV1.ts +93 -0
- package/src/storage/implementation/v1/MongoParameterCompactorV1.ts +26 -0
- package/src/storage/implementation/v1/MongoSyncBucketStorageV1.ts +448 -0
- package/src/storage/implementation/v1/PersistedBatchV1.ts +230 -0
- package/src/storage/implementation/v1/SingleBucketStoreV1.ts +74 -0
- package/src/storage/implementation/v1/SourceRecordStoreV1.ts +156 -0
- package/src/storage/implementation/v1/VersionedPowerSyncMongoV1.ts +28 -0
- package/src/storage/implementation/v1/models.ts +84 -0
- package/src/storage/implementation/v3/MongoBucketBatchV3.ts +44 -0
- package/src/storage/implementation/v3/MongoChecksumsV3.ts +120 -0
- package/src/storage/implementation/v3/MongoCompactorV3.ts +107 -0
- package/src/storage/implementation/v3/MongoParameterCompactorV3.ts +24 -0
- package/src/storage/implementation/v3/MongoParameterLookupV3.ts +12 -0
- package/src/storage/implementation/v3/MongoSyncBucketStorageV3.ts +550 -0
- package/src/storage/implementation/v3/PersistedBatchV3.ts +318 -0
- package/src/storage/implementation/v3/SingleBucketStoreV3.ts +68 -0
- package/src/storage/implementation/v3/SourceRecordStoreV3.ts +226 -0
- package/src/storage/implementation/v3/VersionedPowerSyncMongoV3.ts +112 -0
- package/src/storage/implementation/v3/models.ts +96 -0
- package/src/storage/storage-index.ts +8 -5
- package/src/utils/util.ts +36 -7
- package/test/src/__snapshots__/storage_sync.test.ts.snap +282 -0
- package/test/src/connection-report-storage.test.ts +3 -3
- package/test/src/setup.ts +1 -1
- package/test/src/storage.test.ts +2 -2
- package/test/src/storage_compacting.test.ts +57 -29
- package/test/src/storage_sync.test.ts +351 -5
- package/test/tsconfig.json +0 -1
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/storage/implementation/PersistedBatch.d.ts +0 -71
- package/dist/storage/implementation/PersistedBatch.js +0 -354
- package/dist/storage/implementation/PersistedBatch.js.map +0 -1
- package/src/storage/implementation/PersistedBatch.ts +0 -432
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { isMongoServerError,
|
|
2
|
-
import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
|
|
1
|
+
import { isMongoServerError, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
|
|
2
|
+
import { logger as defaultLogger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
|
|
3
3
|
import { addChecksums, isPartialChecksum, utils } from '@powersync/service-core';
|
|
4
4
|
import { cacheKey } from './OperationBatch.js';
|
|
5
5
|
const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
|
|
@@ -25,6 +25,7 @@ export class MongoCompactor {
|
|
|
25
25
|
buckets;
|
|
26
26
|
signal;
|
|
27
27
|
group_id;
|
|
28
|
+
logger;
|
|
28
29
|
constructor(storage, db, options) {
|
|
29
30
|
this.storage = storage;
|
|
30
31
|
this.db = db;
|
|
@@ -38,6 +39,7 @@ export class MongoCompactor {
|
|
|
38
39
|
this.maxOpId = options.maxOpId ?? 0n;
|
|
39
40
|
this.buckets = options.compactBuckets;
|
|
40
41
|
this.signal = options.signal;
|
|
42
|
+
this.logger = options.logger ?? defaultLogger;
|
|
41
43
|
}
|
|
42
44
|
/**
|
|
43
45
|
* Compact buckets by converting operations into MOVE and/or CLEAR operations.
|
|
@@ -46,9 +48,8 @@ export class MongoCompactor {
|
|
|
46
48
|
*/
|
|
47
49
|
async compact() {
|
|
48
50
|
if (this.buckets) {
|
|
49
|
-
for (
|
|
50
|
-
// We can make this more efficient later on by iterating
|
|
51
|
-
// through the buckets in a single query.
|
|
51
|
+
for (const bucket of this.buckets) {
|
|
52
|
+
// We can make this more efficient later on by iterating through the buckets in a single query.
|
|
52
53
|
// That makes batching more tricky, so we leave for later.
|
|
53
54
|
await this.compactSingleBucketRetried(bucket);
|
|
54
55
|
}
|
|
@@ -57,8 +58,123 @@ export class MongoCompactor {
|
|
|
57
58
|
await this.compactDirtyBuckets();
|
|
58
59
|
}
|
|
59
60
|
}
|
|
61
|
+
/**
|
|
62
|
+
* Subset of compact, only populating checksums where relevant.
|
|
63
|
+
*/
|
|
64
|
+
async populateChecksums(options) {
|
|
65
|
+
let count = 0;
|
|
66
|
+
while (true) {
|
|
67
|
+
this.signal?.throwIfAborted();
|
|
68
|
+
const buckets = await this.dirtyBucketBatchForChecksums(options);
|
|
69
|
+
if (buckets.length == 0) {
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
this.signal?.throwIfAborted();
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
// Filter batch by estimated bucket size, to reduce possibility of timeouts.
|
|
75
|
+
const checkBuckets = [];
|
|
76
|
+
let totalCountEstimate = 0;
|
|
77
|
+
for (const bucket of buckets) {
|
|
78
|
+
checkBuckets.push(bucket);
|
|
79
|
+
totalCountEstimate += bucket.estimatedCount;
|
|
80
|
+
if (totalCountEstimate > 50_000) {
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
this.logger.info(`Calculating checksums for batch of ${buckets.length} buckets, estimated count of ${totalCountEstimate}`);
|
|
85
|
+
await this.updateChecksumsBatch(checkBuckets);
|
|
86
|
+
this.logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`);
|
|
87
|
+
count += checkBuckets.length;
|
|
88
|
+
}
|
|
89
|
+
return { buckets: count };
|
|
90
|
+
}
|
|
91
|
+
async *dirtyBucketBatchesForCollection(collection, lastId, maxId, options, getDefinitionId) {
|
|
92
|
+
while (true) {
|
|
93
|
+
// To avoid timeouts from too many buckets not meeting the minBucketChanges criteria, use an aggregation pipeline
|
|
94
|
+
// to scan a fixed batch of buckets at a time, but only return buckets that meet the criteria.
|
|
95
|
+
const [result] = await collection
|
|
96
|
+
.aggregate([
|
|
97
|
+
{
|
|
98
|
+
$match: {
|
|
99
|
+
_id: { $gt: lastId, $lt: maxId }
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
$sort: { _id: 1 }
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
// Scan a fixed number of docs each query so sparse matches don't block progress.
|
|
107
|
+
$limit: DIRTY_BUCKET_SCAN_BATCH_SIZE
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
$facet: {
|
|
111
|
+
buckets: [
|
|
112
|
+
{
|
|
113
|
+
$match: {
|
|
114
|
+
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
$project: {
|
|
119
|
+
_id: 1,
|
|
120
|
+
estimate_since_compact: 1,
|
|
121
|
+
compacted_state: 1
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
],
|
|
125
|
+
// This is used for the next query.
|
|
126
|
+
cursor: [{ $sort: { _id: -1 } }, { $limit: 1 }, { $project: { _id: 1 } }]
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
], { maxTimeMS: MONGO_OPERATION_TIMEOUT_MS })
|
|
130
|
+
.toArray();
|
|
131
|
+
const cursor = result?.cursor?.[0];
|
|
132
|
+
if (cursor == null) {
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
lastId = cursor._id;
|
|
136
|
+
const mapped = (result?.buckets ?? []).map((bucketState) => {
|
|
137
|
+
// The numbers, specifically the bytes, could be a bigint. Convert to Number to allow calculating ratios.
|
|
138
|
+
// BigInt precision is not needed here since this is only an estimate.
|
|
139
|
+
const updatedCount = bucketState.estimate_since_compact?.count ?? 0;
|
|
140
|
+
const totalCount = (bucketState.compacted_state?.count ?? 0) + updatedCount;
|
|
141
|
+
const updatedBytes = Number(bucketState.estimate_since_compact?.bytes ?? 0);
|
|
142
|
+
const totalBytes = Number(bucketState.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
143
|
+
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
144
|
+
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
145
|
+
return {
|
|
146
|
+
bucket: bucketState._id.b,
|
|
147
|
+
definitionId: getDefinitionId(bucketState),
|
|
148
|
+
estimatedCount: totalCount,
|
|
149
|
+
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
150
|
+
};
|
|
151
|
+
});
|
|
152
|
+
yield mapped.filter((bucket) => bucket.estimatedCount >= options.minBucketChanges && bucket.dirtyRatio >= options.minChangeRatio);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
async dirtyBucketBatchForChecksumsForCollection(collection, filter, getDefinitionId) {
|
|
156
|
+
const dirtyBuckets = await collection
|
|
157
|
+
.find(filter, {
|
|
158
|
+
projection: {
|
|
159
|
+
_id: 1,
|
|
160
|
+
estimate_since_compact: 1,
|
|
161
|
+
compacted_state: 1
|
|
162
|
+
},
|
|
163
|
+
sort: {
|
|
164
|
+
'estimate_since_compact.count': -1
|
|
165
|
+
},
|
|
166
|
+
limit: 200,
|
|
167
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
168
|
+
})
|
|
169
|
+
.toArray();
|
|
170
|
+
return dirtyBuckets.map((bucket) => ({
|
|
171
|
+
bucket: bucket._id.b,
|
|
172
|
+
definitionId: getDefinitionId(bucket),
|
|
173
|
+
estimatedCount: Number(bucket.estimate_since_compact.count) + Number(bucket.compacted_state?.count ?? 0)
|
|
174
|
+
}));
|
|
175
|
+
}
|
|
60
176
|
async compactDirtyBuckets() {
|
|
61
|
-
for await (
|
|
177
|
+
for await (const buckets of this.dirtyBucketBatches({
|
|
62
178
|
minBucketChanges: this.minBucketChanges,
|
|
63
179
|
minChangeRatio: this.minChangeRatio
|
|
64
180
|
})) {
|
|
@@ -66,8 +182,8 @@ export class MongoCompactor {
|
|
|
66
182
|
if (buckets.length == 0) {
|
|
67
183
|
continue;
|
|
68
184
|
}
|
|
69
|
-
for (
|
|
70
|
-
await this.compactSingleBucketRetried(bucket);
|
|
185
|
+
for (const { bucket, definitionId } of buckets) {
|
|
186
|
+
await this.compactSingleBucketRetried(bucket, definitionId);
|
|
71
187
|
}
|
|
72
188
|
}
|
|
73
189
|
}
|
|
@@ -76,16 +192,16 @@ export class MongoCompactor {
|
|
|
76
192
|
*
|
|
77
193
|
* This covers against occasional network or other database errors during a long compact job.
|
|
78
194
|
*/
|
|
79
|
-
async compactSingleBucketRetried(bucket) {
|
|
195
|
+
async compactSingleBucketRetried(bucket, definitionId = null) {
|
|
80
196
|
let retryCount = 0;
|
|
81
197
|
while (true) {
|
|
82
198
|
try {
|
|
83
|
-
await this.compactSingleBucket(bucket);
|
|
199
|
+
await this.compactSingleBucket(bucket, definitionId);
|
|
84
200
|
break;
|
|
85
201
|
}
|
|
86
202
|
catch (e) {
|
|
87
203
|
if (retryCount < 3 && isMongoServerError(e)) {
|
|
88
|
-
logger.warn(`Error compacting bucket ${bucket}, retrying...`, e);
|
|
204
|
+
this.logger.warn(`Error compacting bucket ${bucket}, retrying...`, e);
|
|
89
205
|
retryCount++;
|
|
90
206
|
await new Promise((resolve) => setTimeout(resolve, 1000 * retryCount));
|
|
91
207
|
}
|
|
@@ -95,10 +211,15 @@ export class MongoCompactor {
|
|
|
95
211
|
}
|
|
96
212
|
}
|
|
97
213
|
}
|
|
98
|
-
async compactSingleBucket(bucket) {
|
|
214
|
+
async compactSingleBucket(bucket, definitionId = null) {
|
|
99
215
|
const idLimitBytes = this.idLimitBytes;
|
|
100
|
-
|
|
216
|
+
const bucketContext = await this.getBucketDataContext(bucket, definitionId);
|
|
217
|
+
if (bucketContext == null) {
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
const currentState = {
|
|
101
221
|
bucket,
|
|
222
|
+
definitionId: bucketContext.key.definitionId,
|
|
102
223
|
seen: new Map(),
|
|
103
224
|
trackingSize: 0,
|
|
104
225
|
lastNotPut: null,
|
|
@@ -107,28 +228,24 @@ export class MongoCompactor {
|
|
|
107
228
|
opCount: 0,
|
|
108
229
|
opBytes: 0
|
|
109
230
|
};
|
|
110
|
-
// Constant lower bound
|
|
111
|
-
const lowerBound =
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
o: new mongo.MinKey()
|
|
115
|
-
};
|
|
116
|
-
// Upper bound is adjusted for each batch
|
|
117
|
-
let upperBound = {
|
|
118
|
-
g: this.group_id,
|
|
119
|
-
b: bucket,
|
|
120
|
-
o: new mongo.MaxKey()
|
|
121
|
-
};
|
|
231
|
+
// Constant lower bound.
|
|
232
|
+
const lowerBound = bucketContext.minId;
|
|
233
|
+
// Upper bound is adjusted for each batch.
|
|
234
|
+
let upperBound = bucketContext.maxId;
|
|
122
235
|
while (true) {
|
|
123
236
|
this.signal?.throwIfAborted();
|
|
124
|
-
// Query one batch at a time, to avoid cursor timeouts
|
|
125
|
-
const
|
|
237
|
+
// Query one batch at a time, to avoid cursor timeouts.
|
|
238
|
+
const pipeline = [
|
|
126
239
|
{
|
|
127
240
|
$match: {
|
|
128
241
|
_id: {
|
|
129
242
|
$gte: lowerBound,
|
|
130
243
|
$lt: upperBound
|
|
131
|
-
}
|
|
244
|
+
},
|
|
245
|
+
// Workaround for a clustered collection bug where the $lt operator may include upperBound.
|
|
246
|
+
// Technically only needed for storage V3.
|
|
247
|
+
// https://jira.mongodb.org/browse/SERVER-121822
|
|
248
|
+
'_id.o': { $lt: upperBound.o }
|
|
132
249
|
}
|
|
133
250
|
},
|
|
134
251
|
{ $sort: { _id: -1 } },
|
|
@@ -145,39 +262,45 @@ export class MongoCompactor {
|
|
|
145
262
|
size: { $bsonSize: '$$ROOT' }
|
|
146
263
|
}
|
|
147
264
|
}
|
|
148
|
-
]
|
|
265
|
+
];
|
|
266
|
+
const cursor = bucketContext.collection.aggregate(pipeline, {
|
|
149
267
|
// batchSize is 1 more than limit to auto-close the cursor.
|
|
150
268
|
// See https://github.com/mongodb/node-mongodb-native/pull/4580
|
|
151
269
|
batchSize: this.moveBatchQueryLimit + 1
|
|
152
270
|
});
|
|
153
271
|
// We don't limit to a single batch here, since that often causes MongoDB to scan through more than it returns.
|
|
154
272
|
// Instead, we load up to the limit.
|
|
155
|
-
const
|
|
273
|
+
const rawBatch = await cursor.toArray();
|
|
274
|
+
const batch = rawBatch.map((document) => {
|
|
275
|
+
const { size, ...rest } = document;
|
|
276
|
+
return {
|
|
277
|
+
doc: bucketContext.fromPersistedDocument(rest),
|
|
278
|
+
size
|
|
279
|
+
};
|
|
280
|
+
});
|
|
156
281
|
if (batch.length == 0) {
|
|
157
|
-
// We've reached the end
|
|
282
|
+
// We've reached the end.
|
|
158
283
|
break;
|
|
159
284
|
}
|
|
160
|
-
//
|
|
161
|
-
upperBound =
|
|
162
|
-
for (
|
|
163
|
-
if (doc.
|
|
285
|
+
// Reuse the exact collection _id value from Mongo for the next bound.
|
|
286
|
+
upperBound = rawBatch[rawBatch.length - 1]._id;
|
|
287
|
+
for (const { doc, size } of batch) {
|
|
288
|
+
if (doc.o > this.maxOpId) {
|
|
164
289
|
continue;
|
|
165
290
|
}
|
|
166
291
|
currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
|
|
167
292
|
currentState.opCount += 1;
|
|
168
293
|
let isPersistentPut = doc.op == 'PUT';
|
|
169
|
-
currentState.opBytes += Number(
|
|
294
|
+
currentState.opBytes += Number(size);
|
|
170
295
|
if (doc.op == 'REMOVE' || doc.op == 'PUT') {
|
|
171
296
|
const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table, doc.source_key)}`;
|
|
172
297
|
const targetOp = currentState.seen.get(key);
|
|
173
298
|
if (targetOp) {
|
|
174
|
-
// Will convert to MOVE, so don't count as PUT
|
|
299
|
+
// Will convert to MOVE, so don't count as PUT.
|
|
175
300
|
isPersistentPut = false;
|
|
176
301
|
this.updates.push({
|
|
177
302
|
updateOne: {
|
|
178
|
-
filter: {
|
|
179
|
-
_id: doc._id
|
|
180
|
-
},
|
|
303
|
+
filter: { _id: bucketContext.docId(doc.o) },
|
|
181
304
|
update: {
|
|
182
305
|
$set: {
|
|
183
306
|
op: 'MOVE',
|
|
@@ -193,22 +316,17 @@ export class MongoCompactor {
|
|
|
193
316
|
}
|
|
194
317
|
}
|
|
195
318
|
});
|
|
196
|
-
|
|
319
|
+
// TODO: better estimate for this.
|
|
320
|
+
currentState.opBytes += 200 - Number(size);
|
|
197
321
|
}
|
|
198
|
-
else {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
// length + 16 for the string
|
|
207
|
-
// 24 for the bigint
|
|
208
|
-
// 50 for map overhead
|
|
209
|
-
// 50 for additional overhead
|
|
210
|
-
currentState.trackingSize += key.length + 140;
|
|
211
|
-
}
|
|
322
|
+
else if (currentState.trackingSize < idLimitBytes) {
|
|
323
|
+
// flatstr reduces the memory usage by flattening the string.
|
|
324
|
+
currentState.seen.set(utils.flatstr(key), doc.o);
|
|
325
|
+
// length + 16 for the string
|
|
326
|
+
// 24 for the bigint
|
|
327
|
+
// 50 for map overhead
|
|
328
|
+
// 50 for additional overhead
|
|
329
|
+
currentState.trackingSize += key.length + 140;
|
|
212
330
|
}
|
|
213
331
|
}
|
|
214
332
|
if (isPersistentPut) {
|
|
@@ -217,44 +335,36 @@ export class MongoCompactor {
|
|
|
217
335
|
}
|
|
218
336
|
else if (doc.op != 'CLEAR') {
|
|
219
337
|
if (currentState.lastNotPut == null) {
|
|
220
|
-
currentState.lastNotPut = doc.
|
|
338
|
+
currentState.lastNotPut = doc.o;
|
|
221
339
|
}
|
|
222
340
|
currentState.opsSincePut += 1;
|
|
223
341
|
}
|
|
224
342
|
if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
|
|
225
|
-
await this.flush();
|
|
343
|
+
await this.flush(bucketContext);
|
|
226
344
|
}
|
|
227
345
|
}
|
|
228
|
-
logger.info(`Processed batch of length ${batch.length} current bucket: ${bucket}`);
|
|
346
|
+
this.logger.info(`Processed batch of length ${batch.length} current bucket: ${bucket}`);
|
|
229
347
|
}
|
|
230
|
-
// Free memory before clearing bucket
|
|
348
|
+
// Free memory before clearing the bucket.
|
|
231
349
|
currentState.seen.clear();
|
|
232
350
|
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
233
|
-
logger.info(`Inserting CLEAR at ${this.group_id}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`);
|
|
234
|
-
// Need flush() before clear()
|
|
235
|
-
await this.flush();
|
|
236
|
-
await this.clearBucket(currentState);
|
|
351
|
+
this.logger.info(`Inserting CLEAR at ${this.group_id}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`);
|
|
352
|
+
// Need flush() before clear().
|
|
353
|
+
await this.flush(bucketContext);
|
|
354
|
+
await this.clearBucket(currentState, bucketContext);
|
|
237
355
|
}
|
|
238
|
-
// Do this
|
|
356
|
+
// Do this after clearBucket so we have accurate counts.
|
|
239
357
|
this.updateBucketChecksums(currentState);
|
|
240
|
-
// Need another flush after updateBucketChecksums()
|
|
241
|
-
await this.flush();
|
|
358
|
+
// Need another flush after updateBucketChecksums().
|
|
359
|
+
await this.flush(bucketContext);
|
|
242
360
|
}
|
|
243
|
-
/**
|
|
244
|
-
* Call when done with a bucket.
|
|
245
|
-
*/
|
|
246
361
|
updateBucketChecksums(state) {
|
|
247
362
|
if (state.opCount < 0) {
|
|
248
363
|
throw new ServiceAssertionError(`Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`);
|
|
249
364
|
}
|
|
250
365
|
this.bucketStateUpdates.push({
|
|
251
366
|
updateOne: {
|
|
252
|
-
filter:
|
|
253
|
-
_id: {
|
|
254
|
-
g: this.group_id,
|
|
255
|
-
b: state.bucket
|
|
256
|
-
}
|
|
257
|
-
},
|
|
367
|
+
filter: this.bucketStateFilter(state.bucket, state.definitionId),
|
|
258
368
|
update: {
|
|
259
369
|
$set: {
|
|
260
370
|
compacted_state: {
|
|
@@ -264,9 +374,8 @@ export class MongoCompactor {
|
|
|
264
374
|
bytes: state.opBytes
|
|
265
375
|
},
|
|
266
376
|
estimate_since_compact: {
|
|
267
|
-
//
|
|
268
|
-
// which we don't currently cater for.
|
|
269
|
-
// We could potentially query for that, but that could add overhead.
|
|
377
|
+
// There could have been a whole bunch of new operations added to the bucket while compacting,
|
|
378
|
+
// which we don't currently cater for. We could potentially query for that, but that adds overhead.
|
|
270
379
|
count: 0,
|
|
271
380
|
bytes: 0
|
|
272
381
|
}
|
|
@@ -278,48 +387,36 @@ export class MongoCompactor {
|
|
|
278
387
|
}
|
|
279
388
|
});
|
|
280
389
|
}
|
|
281
|
-
async flush() {
|
|
390
|
+
async flush(col) {
|
|
282
391
|
if (this.updates.length > 0) {
|
|
283
|
-
logger.info(`Compacting ${this.updates.length} ops`);
|
|
284
|
-
await
|
|
285
|
-
// Order is not important.
|
|
286
|
-
//
|
|
287
|
-
// and it's fine if the operations are partially applied.
|
|
288
|
-
// Each individual operation is atomic.
|
|
392
|
+
this.logger.info(`Compacting ${this.updates.length} ops`);
|
|
393
|
+
await col.collection.bulkWrite(this.updates, {
|
|
394
|
+
// Order is not important. Since checksums are not affected, these operations can happen in any order,
|
|
395
|
+
// and it's fine if the operations are partially applied. Each individual operation is atomic.
|
|
289
396
|
ordered: false
|
|
290
397
|
});
|
|
291
398
|
this.updates = [];
|
|
292
399
|
}
|
|
400
|
+
await this.flushBucketStateUpdates();
|
|
401
|
+
}
|
|
402
|
+
async flushBucketStateUpdates() {
|
|
293
403
|
if (this.bucketStateUpdates.length > 0) {
|
|
294
|
-
logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
295
|
-
await this.
|
|
296
|
-
ordered: false
|
|
297
|
-
});
|
|
404
|
+
this.logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
405
|
+
await this.writeBucketStateUpdates();
|
|
298
406
|
this.bucketStateUpdates = [];
|
|
299
407
|
}
|
|
300
408
|
}
|
|
301
409
|
/**
|
|
302
410
|
* Perform a CLEAR compact for a bucket.
|
|
303
411
|
*
|
|
304
|
-
*
|
|
305
|
-
* @param bucket bucket name
|
|
306
|
-
* @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
|
|
412
|
+
* @param currentState tracks the last non-PUT op, which will be converted to CLEAR.
|
|
307
413
|
*/
|
|
308
|
-
async clearBucket(currentState) {
|
|
309
|
-
const bucket = currentState.bucket;
|
|
414
|
+
async clearBucket(currentState, col) {
|
|
310
415
|
const clearOp = currentState.lastNotPut;
|
|
311
416
|
const opFilter = {
|
|
312
417
|
_id: {
|
|
313
|
-
$gte:
|
|
314
|
-
|
|
315
|
-
b: bucket,
|
|
316
|
-
o: new mongo.MinKey()
|
|
317
|
-
},
|
|
318
|
-
$lte: {
|
|
319
|
-
g: this.group_id,
|
|
320
|
-
b: bucket,
|
|
321
|
-
o: clearOp
|
|
322
|
-
}
|
|
418
|
+
$gte: col.minId,
|
|
419
|
+
$lte: col.docId(clearOp)
|
|
323
420
|
}
|
|
324
421
|
};
|
|
325
422
|
const session = this.db.client.startSession();
|
|
@@ -332,7 +429,7 @@ export class MongoCompactor {
|
|
|
332
429
|
// The state after each batch is fully consistent.
|
|
333
430
|
// We need a transaction per batch to make sure checksums stay consistent.
|
|
334
431
|
await session.withTransaction(async () => {
|
|
335
|
-
const query =
|
|
432
|
+
const query = col.collection.find(opFilter, {
|
|
336
433
|
session,
|
|
337
434
|
sort: { _id: 1 },
|
|
338
435
|
projection: {
|
|
@@ -344,56 +441,52 @@ export class MongoCompactor {
|
|
|
344
441
|
limit: this.clearBatchLimit
|
|
345
442
|
});
|
|
346
443
|
let checksum = 0;
|
|
347
|
-
let
|
|
444
|
+
let lastOp = null;
|
|
348
445
|
let targetOp = null;
|
|
349
446
|
let gotAnOp = false;
|
|
350
447
|
let numberOfOpsToClear = 0;
|
|
351
|
-
for await (
|
|
448
|
+
for await (const rawOp of query.stream()) {
|
|
449
|
+
const op = col.fromPartialPersistedDocument(rawOp);
|
|
352
450
|
if (op.op == 'MOVE' || op.op == 'REMOVE' || op.op == 'CLEAR') {
|
|
353
451
|
checksum = utils.addChecksums(checksum, Number(op.checksum));
|
|
354
|
-
|
|
452
|
+
lastOp = op;
|
|
355
453
|
numberOfOpsToClear += 1;
|
|
356
454
|
if (op.op != 'CLEAR') {
|
|
357
455
|
gotAnOp = true;
|
|
358
456
|
}
|
|
359
|
-
if (op.target_op != null) {
|
|
360
|
-
|
|
361
|
-
targetOp = op.target_op;
|
|
362
|
-
}
|
|
457
|
+
if (op.target_op != null && (targetOp == null || op.target_op > targetOp)) {
|
|
458
|
+
targetOp = op.target_op;
|
|
363
459
|
}
|
|
364
460
|
}
|
|
365
461
|
else {
|
|
366
|
-
throw new ReplicationAssertionError(`Unexpected ${op.op} operation at ${
|
|
462
|
+
throw new ReplicationAssertionError(`Unexpected ${op.op} operation at ${this.formatBucketDataKey(op)}`);
|
|
367
463
|
}
|
|
368
464
|
}
|
|
369
465
|
if (!gotAnOp) {
|
|
370
466
|
done = true;
|
|
371
467
|
return;
|
|
372
468
|
}
|
|
373
|
-
logger.info(`Flushing CLEAR for ${numberOfOpsToClear} ops at ${
|
|
374
|
-
await
|
|
469
|
+
this.logger.info(`Flushing CLEAR for ${numberOfOpsToClear} ops at ${lastOp?.o}`);
|
|
470
|
+
await col.collection.deleteMany({
|
|
375
471
|
_id: {
|
|
376
|
-
$gte:
|
|
377
|
-
|
|
378
|
-
b: bucket,
|
|
379
|
-
o: new mongo.MinKey()
|
|
380
|
-
},
|
|
381
|
-
$lte: lastOpId
|
|
472
|
+
$gte: col.minId,
|
|
473
|
+
$lte: col.docId(lastOp.o)
|
|
382
474
|
}
|
|
383
475
|
}, { session });
|
|
384
|
-
|
|
385
|
-
|
|
476
|
+
const op = col.toPersistedDocument({
|
|
477
|
+
o: lastOp.o,
|
|
386
478
|
op: 'CLEAR',
|
|
387
479
|
checksum: BigInt(checksum),
|
|
388
480
|
data: null,
|
|
389
481
|
target_op: targetOp
|
|
390
|
-
}
|
|
482
|
+
});
|
|
483
|
+
await col.collection.insertOne(op, { session });
|
|
391
484
|
opCountDiff = -numberOfOpsToClear + 1;
|
|
392
485
|
}, {
|
|
393
486
|
writeConcern: { w: 'majority' },
|
|
394
487
|
readConcern: { level: 'snapshot' }
|
|
395
488
|
});
|
|
396
|
-
// Update
|
|
489
|
+
// Update outside the transaction, since the transaction can be retried multiple times.
|
|
397
490
|
currentState.opCount += opCountDiff;
|
|
398
491
|
}
|
|
399
492
|
}
|
|
@@ -401,180 +494,17 @@ export class MongoCompactor {
|
|
|
401
494
|
await session.endSession();
|
|
402
495
|
}
|
|
403
496
|
}
|
|
404
|
-
/**
|
|
405
|
-
* Subset of compact, only populating checksums where relevant.
|
|
406
|
-
*/
|
|
407
|
-
async populateChecksums(options) {
|
|
408
|
-
let count = 0;
|
|
409
|
-
while (true) {
|
|
410
|
-
this.signal?.throwIfAborted();
|
|
411
|
-
const buckets = await this.dirtyBucketBatchForChecksums(options);
|
|
412
|
-
if (buckets.length == 0) {
|
|
413
|
-
// All done
|
|
414
|
-
break;
|
|
415
|
-
}
|
|
416
|
-
this.signal?.throwIfAborted();
|
|
417
|
-
const start = Date.now();
|
|
418
|
-
// Filter batch by estimated bucket size, to reduce possibility of timeouts
|
|
419
|
-
let checkBuckets = [];
|
|
420
|
-
let totalCountEstimate = 0;
|
|
421
|
-
for (let bucket of buckets) {
|
|
422
|
-
checkBuckets.push(bucket);
|
|
423
|
-
totalCountEstimate += bucket.estimatedCount;
|
|
424
|
-
if (totalCountEstimate > 50_000) {
|
|
425
|
-
break;
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
logger.info(`Calculating checksums for batch of ${buckets.length} buckets, estimated count of ${totalCountEstimate}`);
|
|
429
|
-
await this.updateChecksumsBatch(checkBuckets.map((b) => b.bucket));
|
|
430
|
-
logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`);
|
|
431
|
-
count += checkBuckets.length;
|
|
432
|
-
}
|
|
433
|
-
return { buckets: count };
|
|
434
|
-
}
|
|
435
|
-
/**
|
|
436
|
-
* Return batches of dirty buckets.
|
|
437
|
-
*
|
|
438
|
-
* Can be used to iterate through all buckets.
|
|
439
|
-
*
|
|
440
|
-
* minBucketChanges: minimum number of changes for a bucket to be included in the results.
|
|
441
|
-
* minChangeRatio: minimum ratio of changes to total ops for a bucket to be included in the results, number between 0 and 1.
|
|
442
|
-
*/
|
|
443
|
-
async *dirtyBucketBatches(options) {
|
|
444
|
-
// Previously, we used an index on {_id.g: 1, estimate_since_compact.count: 1} to only buckets with changes.
|
|
445
|
-
// This works well if there are only a small number of buckets with changes.
|
|
446
|
-
// However, if buckets are continuosly modified while we are compacting, we get the same buckets over and over again.
|
|
447
|
-
// This has caused the compact process to re-read the same collection around 5x times in total, which is very inefficient.
|
|
448
|
-
// To solve this, we now just iterate through all buckets, and filter out the ones with low changes.
|
|
449
|
-
if (options.minBucketChanges <= 0) {
|
|
450
|
-
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
451
|
-
}
|
|
452
|
-
let lastId = { g: this.group_id, b: new mongo.MinKey() };
|
|
453
|
-
const maxId = { g: this.group_id, b: new mongo.MaxKey() };
|
|
454
|
-
while (true) {
|
|
455
|
-
// To avoid timeouts from too many buckets not meeting the minBucketChanges criteria, we use an aggregation pipeline
|
|
456
|
-
// to scan a fixed batch of buckets at a time, but only return buckets that meet the criteria, rather than limiting
|
|
457
|
-
// on the output number.
|
|
458
|
-
const [result] = await this.db.bucket_state
|
|
459
|
-
.aggregate([
|
|
460
|
-
{
|
|
461
|
-
$match: {
|
|
462
|
-
_id: { $gt: lastId, $lt: maxId }
|
|
463
|
-
}
|
|
464
|
-
},
|
|
465
|
-
{
|
|
466
|
-
$sort: { _id: 1 }
|
|
467
|
-
},
|
|
468
|
-
{
|
|
469
|
-
// Scan a fixed number of docs each query so sparse matches don't block progress.
|
|
470
|
-
$limit: DIRTY_BUCKET_SCAN_BATCH_SIZE
|
|
471
|
-
},
|
|
472
|
-
{
|
|
473
|
-
$facet: {
|
|
474
|
-
// This is the results for the batch
|
|
475
|
-
buckets: [
|
|
476
|
-
{
|
|
477
|
-
$match: {
|
|
478
|
-
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
479
|
-
}
|
|
480
|
-
},
|
|
481
|
-
{
|
|
482
|
-
$project: {
|
|
483
|
-
_id: 1,
|
|
484
|
-
estimate_since_compact: 1,
|
|
485
|
-
compacted_state: 1
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
],
|
|
489
|
-
// This is used for the next query.
|
|
490
|
-
cursor: [{ $sort: { _id: -1 } }, { $limit: 1 }, { $project: { _id: 1 } }]
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
], { maxTimeMS: MONGO_OPERATION_TIMEOUT_MS })
|
|
494
|
-
.toArray();
|
|
495
|
-
const cursor = result?.cursor?.[0];
|
|
496
|
-
if (cursor == null) {
|
|
497
|
-
break;
|
|
498
|
-
}
|
|
499
|
-
lastId = cursor._id;
|
|
500
|
-
const mapped = (result?.buckets ?? []).map((b) => {
|
|
501
|
-
// The numbers, specifically the bytes, could be a bigint. We convert to Number to allow calculating the ratios.
|
|
502
|
-
// BigInt precision is not needed here since it's just an estimate.
|
|
503
|
-
const updatedCount = b.estimate_since_compact?.count ?? 0;
|
|
504
|
-
const totalCount = (b.compacted_state?.count ?? 0) + updatedCount;
|
|
505
|
-
const updatedBytes = Number(b.estimate_since_compact?.bytes ?? 0);
|
|
506
|
-
const totalBytes = Number(b.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
507
|
-
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
508
|
-
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
509
|
-
return {
|
|
510
|
-
bucket: b._id.b,
|
|
511
|
-
estimatedCount: totalCount,
|
|
512
|
-
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
513
|
-
};
|
|
514
|
-
});
|
|
515
|
-
const filtered = mapped.filter((b) => b.estimatedCount >= options.minBucketChanges && b.dirtyRatio >= options.minChangeRatio);
|
|
516
|
-
yield filtered;
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
/**
|
|
520
|
-
* Returns a batch of dirty buckets - buckets with most changes first.
|
|
521
|
-
*
|
|
522
|
-
* This cannot be used to iterate on its own - the client is expected to process these buckets and
|
|
523
|
-
* set estimate_since_compact.count: 0 when done, before fetching the next batch.
|
|
524
|
-
*
|
|
525
|
-
* Unlike dirtyBucketBatches, used for compacting, this is specifically designed to be resuamble after a restart,
|
|
526
|
-
* since it is used as the last step for initial replication.
|
|
527
|
-
*
|
|
528
|
-
* We currently don't get new data while doing populateChecksums, so we don't need to worry about buckets changing while processing.
|
|
529
|
-
*/
|
|
530
|
-
async dirtyBucketBatchForChecksums(options) {
|
|
531
|
-
if (options.minBucketChanges <= 0) {
|
|
532
|
-
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
533
|
-
}
|
|
534
|
-
// We make use of an index on {_id.g: 1, 'estimate_since_compact.count': -1}
|
|
535
|
-
const dirtyBuckets = await this.db.bucket_state
|
|
536
|
-
.find({
|
|
537
|
-
'_id.g': this.group_id,
|
|
538
|
-
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
539
|
-
}, {
|
|
540
|
-
projection: {
|
|
541
|
-
_id: 1,
|
|
542
|
-
estimate_since_compact: 1,
|
|
543
|
-
compacted_state: 1
|
|
544
|
-
},
|
|
545
|
-
sort: {
|
|
546
|
-
'estimate_since_compact.count': -1
|
|
547
|
-
},
|
|
548
|
-
limit: 200,
|
|
549
|
-
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
550
|
-
})
|
|
551
|
-
.toArray();
|
|
552
|
-
return dirtyBuckets.map((bucket) => ({
|
|
553
|
-
bucket: bucket._id.b,
|
|
554
|
-
estimatedCount: Number(bucket.estimate_since_compact.count) + Number(bucket.compacted_state?.count ?? 0)
|
|
555
|
-
}));
|
|
556
|
-
}
|
|
557
497
|
async updateChecksumsBatch(buckets) {
|
|
558
|
-
const checksums = await this.
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
source: {},
|
|
562
|
-
end: this.maxOpId
|
|
563
|
-
};
|
|
564
|
-
}));
|
|
565
|
-
for (let bucketChecksum of checksums.values()) {
|
|
498
|
+
const checksums = await this.computeChecksumsForBuckets(buckets);
|
|
499
|
+
const definitionIdByBucket = new Map(buckets.map((bucket) => [bucket.bucket, bucket.definitionId]));
|
|
500
|
+
for (const bucketChecksum of checksums.values()) {
|
|
566
501
|
if (isPartialChecksum(bucketChecksum)) {
|
|
567
|
-
// Should never happen since we don't specify `start
|
|
502
|
+
// Should never happen since we don't specify `start`.
|
|
568
503
|
throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
|
|
569
504
|
}
|
|
570
505
|
this.bucketStateUpdates.push({
|
|
571
506
|
updateOne: {
|
|
572
|
-
filter:
|
|
573
|
-
_id: {
|
|
574
|
-
g: this.group_id,
|
|
575
|
-
b: bucketChecksum.bucket
|
|
576
|
-
}
|
|
577
|
-
},
|
|
507
|
+
filter: this.bucketStateFilter(bucketChecksum.bucket, definitionIdByBucket.get(bucketChecksum.bucket) ?? null),
|
|
578
508
|
update: {
|
|
579
509
|
$set: {
|
|
580
510
|
compacted_state: {
|
|
@@ -589,13 +519,16 @@ export class MongoCompactor {
|
|
|
589
519
|
}
|
|
590
520
|
}
|
|
591
521
|
},
|
|
592
|
-
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on
|
|
593
|
-
// bucket_updates
|
|
522
|
+
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on
|
|
523
|
+
// bucket_updates.
|
|
594
524
|
upsert: false
|
|
595
525
|
}
|
|
596
526
|
});
|
|
597
527
|
}
|
|
598
|
-
await this.
|
|
528
|
+
await this.flushBucketStateUpdates();
|
|
529
|
+
}
|
|
530
|
+
formatBucketDataKey(doc) {
|
|
531
|
+
return `${doc.bucketKey.replicationStreamId}:${doc.bucketKey.bucket}:${doc.o}`;
|
|
599
532
|
}
|
|
600
533
|
}
|
|
601
534
|
//# sourceMappingURL=MongoCompactor.js.map
|