@nxtedition/rocksdb 7.0.24 → 7.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +12 -3
- package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
- package/deps/rocksdb/rocksdb/Makefile +6 -2
- package/deps/rocksdb/rocksdb/TARGETS +14 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
- package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
- package/deps/rocksdb/rocksdb/db/c.cc +68 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
- package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
- package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
- package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
- package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
- package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
- package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
- package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
- package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
- package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
- package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
- package/deps/rocksdb/rocksdb/src.mk +5 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/util/compression.h +2 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
- package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb.gyp +5 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
//
|
|
7
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
8
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
9
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include "db/blob/blob_garbage_meter.h"
|
|
14
|
+
#include "db/compaction/compaction.h"
|
|
15
|
+
#include "db/compaction/compaction_iterator.h"
|
|
16
|
+
#include "db/internal_stats.h"
|
|
17
|
+
#include "db/output_validator.h"
|
|
18
|
+
|
|
19
|
+
namespace ROCKSDB_NAMESPACE {
|
|
20
|
+
|
|
21
|
+
class CompactionOutputs;
|
|
22
|
+
using CompactionFileOpenFunc = std::function<Status(CompactionOutputs&)>;
|
|
23
|
+
using CompactionFileCloseFunc =
|
|
24
|
+
std::function<Status(CompactionOutputs&, const Status&, const Slice&)>;
|
|
25
|
+
|
|
26
|
+
// Files produced by subcompaction, most of the functions are used by
|
|
27
|
+
// compaction_job Open/Close compaction file functions.
|
|
28
|
+
class CompactionOutputs {
|
|
29
|
+
public:
|
|
30
|
+
// compaction output file
|
|
31
|
+
struct Output {
|
|
32
|
+
Output(FileMetaData&& _meta, const InternalKeyComparator& _icmp,
|
|
33
|
+
bool _enable_order_check, bool _enable_hash, bool _finished,
|
|
34
|
+
uint64_t precalculated_hash)
|
|
35
|
+
: meta(std::move(_meta)),
|
|
36
|
+
validator(_icmp, _enable_order_check, _enable_hash,
|
|
37
|
+
precalculated_hash),
|
|
38
|
+
finished(_finished) {}
|
|
39
|
+
FileMetaData meta;
|
|
40
|
+
OutputValidator validator;
|
|
41
|
+
bool finished;
|
|
42
|
+
std::shared_ptr<const TableProperties> table_properties;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
CompactionOutputs() = delete;
|
|
46
|
+
|
|
47
|
+
explicit CompactionOutputs(const Compaction* compaction,
|
|
48
|
+
const bool is_penultimate_level)
|
|
49
|
+
: compaction_(compaction), is_penultimate_level_(is_penultimate_level) {
|
|
50
|
+
partitioner_ = compaction->output_level() == 0
|
|
51
|
+
? nullptr
|
|
52
|
+
: compaction->CreateSstPartitioner();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Add generated output to the list
|
|
56
|
+
void AddOutput(FileMetaData&& meta, const InternalKeyComparator& icmp,
|
|
57
|
+
bool enable_order_check, bool enable_hash,
|
|
58
|
+
bool finished = false, uint64_t precalculated_hash = 0) {
|
|
59
|
+
outputs_.emplace_back(std::move(meta), icmp, enable_order_check,
|
|
60
|
+
enable_hash, finished, precalculated_hash);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Set new table builder for the current output
|
|
64
|
+
void NewBuilder(const TableBuilderOptions& tboptions);
|
|
65
|
+
|
|
66
|
+
// Assign a new WritableFileWriter to the current output
|
|
67
|
+
void AssignFileWriter(WritableFileWriter* writer) {
|
|
68
|
+
file_writer_.reset(writer);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// TODO: Remove it when remote compaction support tiered compaction
|
|
72
|
+
void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; }
|
|
73
|
+
void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
|
|
74
|
+
|
|
75
|
+
// TODO: Move the BlobDB builder into CompactionOutputs
|
|
76
|
+
const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
|
|
77
|
+
if (is_penultimate_level_) {
|
|
78
|
+
assert(blob_file_additions_.empty());
|
|
79
|
+
}
|
|
80
|
+
return blob_file_additions_;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
std::vector<BlobFileAddition>* GetBlobFileAdditionsPtr() {
|
|
84
|
+
assert(!is_penultimate_level_);
|
|
85
|
+
return &blob_file_additions_;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
bool HasBlobFileAdditions() const { return !blob_file_additions_.empty(); }
|
|
89
|
+
|
|
90
|
+
BlobGarbageMeter* CreateBlobGarbageMeter() {
|
|
91
|
+
assert(!is_penultimate_level_);
|
|
92
|
+
blob_garbage_meter_ = std::make_unique<BlobGarbageMeter>();
|
|
93
|
+
return blob_garbage_meter_.get();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
BlobGarbageMeter* GetBlobGarbageMeter() const {
|
|
97
|
+
if (is_penultimate_level_) {
|
|
98
|
+
// blobdb doesn't support per_key_placement yet
|
|
99
|
+
assert(blob_garbage_meter_ == nullptr);
|
|
100
|
+
return nullptr;
|
|
101
|
+
}
|
|
102
|
+
return blob_garbage_meter_.get();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
void UpdateBlobStats() {
|
|
106
|
+
assert(!is_penultimate_level_);
|
|
107
|
+
stats_.num_output_files_blob = blob_file_additions_.size();
|
|
108
|
+
for (const auto& blob : blob_file_additions_) {
|
|
109
|
+
stats_.bytes_written_blob += blob.GetTotalBlobBytes();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Finish the current output file
|
|
114
|
+
Status Finish(const Status& intput_status);
|
|
115
|
+
|
|
116
|
+
// Update output table properties from table builder
|
|
117
|
+
void UpdateTableProperties() {
|
|
118
|
+
current_output().table_properties =
|
|
119
|
+
std::make_shared<TableProperties>(GetTableProperties());
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
IOStatus WriterSyncClose(const Status& intput_status, SystemClock* clock,
|
|
123
|
+
Statistics* statistics, bool use_fsync);
|
|
124
|
+
|
|
125
|
+
TableProperties GetTableProperties() {
|
|
126
|
+
return builder_->GetTableProperties();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
Slice SmallestUserKey() const {
|
|
130
|
+
if (!outputs_.empty() && outputs_[0].finished) {
|
|
131
|
+
return outputs_[0].meta.smallest.user_key();
|
|
132
|
+
} else {
|
|
133
|
+
return Slice{nullptr, 0};
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
Slice LargestUserKey() const {
|
|
138
|
+
if (!outputs_.empty() && outputs_.back().finished) {
|
|
139
|
+
return outputs_.back().meta.largest.user_key();
|
|
140
|
+
} else {
|
|
141
|
+
return Slice{nullptr, 0};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// In case the last output file is empty, which doesn't need to keep.
|
|
146
|
+
void RemoveLastEmptyOutput() {
|
|
147
|
+
if (!outputs_.empty() && !outputs_.back().meta.fd.file_size) {
|
|
148
|
+
// An error occurred, so ignore the last output.
|
|
149
|
+
outputs_.pop_back();
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Remove the last output, for example the last output doesn't have data (no
|
|
154
|
+
// entry and no range-dels), but file_size might not be 0, as it has SST
|
|
155
|
+
// metadata.
|
|
156
|
+
void RemoveLastOutput() {
|
|
157
|
+
assert(!outputs_.empty());
|
|
158
|
+
outputs_.pop_back();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
bool HasBuilder() const { return builder_ != nullptr; }
|
|
162
|
+
|
|
163
|
+
FileMetaData* GetMetaData() { return ¤t_output().meta; }
|
|
164
|
+
|
|
165
|
+
bool HasOutput() const { return !outputs_.empty(); }
|
|
166
|
+
|
|
167
|
+
uint64_t NumEntries() const { return builder_->NumEntries(); }
|
|
168
|
+
|
|
169
|
+
void ResetBuilder() {
|
|
170
|
+
builder_.reset();
|
|
171
|
+
current_output_file_size_ = 0;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Add range-dels from the aggregator to the current output file
|
|
175
|
+
Status AddRangeDels(const Slice* comp_start, const Slice* comp_end,
|
|
176
|
+
CompactionIterationStats& range_del_out_stats,
|
|
177
|
+
bool bottommost_level, const InternalKeyComparator& icmp,
|
|
178
|
+
SequenceNumber earliest_snapshot,
|
|
179
|
+
const Slice& next_table_min_key);
|
|
180
|
+
|
|
181
|
+
// Is the current file is already pending for close
|
|
182
|
+
bool IsPendingClose() const { return pending_close_; }
|
|
183
|
+
|
|
184
|
+
// Current file should close before adding a new key
|
|
185
|
+
void SetPendingClose() { pending_close_ = true; }
|
|
186
|
+
|
|
187
|
+
// if the outputs have range delete, range delete is also data
|
|
188
|
+
bool HasRangeDel() const {
|
|
189
|
+
return range_del_agg_ && !range_del_agg_->IsEmpty();
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
private:
|
|
193
|
+
friend class SubcompactionState;
|
|
194
|
+
|
|
195
|
+
void Cleanup() {
|
|
196
|
+
if (builder_ != nullptr) {
|
|
197
|
+
// May happen if we get a shutdown call in the middle of compaction
|
|
198
|
+
builder_->Abandon();
|
|
199
|
+
builder_.reset();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
uint64_t GetCurrentOutputFileSize() const {
|
|
204
|
+
return current_output_file_size_;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Add curent key from compaction_iterator to the output file. If needed
|
|
208
|
+
// close and open new compaction output with the functions provided.
|
|
209
|
+
Status AddToOutput(const CompactionIterator& c_iter,
|
|
210
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
211
|
+
const CompactionFileCloseFunc& close_file_func);
|
|
212
|
+
|
|
213
|
+
// Close the current output. `open_file_func` is needed for creating new file
|
|
214
|
+
// for range-dels only output file.
|
|
215
|
+
Status CloseOutput(const Status& curr_status,
|
|
216
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
217
|
+
const CompactionFileCloseFunc& close_file_func) {
|
|
218
|
+
Status status = curr_status;
|
|
219
|
+
// handle subcompaction containing only range deletions
|
|
220
|
+
if (status.ok() && !HasBuilder() && !HasOutput() && HasRangeDel()) {
|
|
221
|
+
status = open_file_func(*this);
|
|
222
|
+
}
|
|
223
|
+
if (HasBuilder()) {
|
|
224
|
+
const Slice empty_key{};
|
|
225
|
+
Status s = close_file_func(*this, status, empty_key);
|
|
226
|
+
if (!s.ok() && status.ok()) {
|
|
227
|
+
status = s;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return status;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// This subcompaction's output could be empty if compaction was aborted before
|
|
235
|
+
// this subcompaction had a chance to generate any output files. When
|
|
236
|
+
// subcompactions are executed sequentially this is more likely and will be
|
|
237
|
+
// particularly likely for the later subcompactions to be empty. Once they are
|
|
238
|
+
// run in parallel however it should be much rarer.
|
|
239
|
+
// It's caller's responsibility to make sure it's not empty.
|
|
240
|
+
Output& current_output() {
|
|
241
|
+
assert(!outputs_.empty());
|
|
242
|
+
return outputs_.back();
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Assign the range_del_agg to the target output level. There's only one
|
|
246
|
+
// range-del-aggregator per compaction outputs, for
|
|
247
|
+
// output_to_penultimate_level compaction it is only assigned to the
|
|
248
|
+
// penultimate level.
|
|
249
|
+
void AssignRangeDelAggregator(
|
|
250
|
+
std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
|
|
251
|
+
assert(range_del_agg_ == nullptr);
|
|
252
|
+
range_del_agg_ = std::move(range_del_agg);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const Compaction* compaction_;
|
|
256
|
+
|
|
257
|
+
// The current file is pending close, which needs to run `close_file_func()`
|
|
258
|
+
// first to add a new key.
|
|
259
|
+
bool pending_close_ = false;
|
|
260
|
+
|
|
261
|
+
// current output builder and writer
|
|
262
|
+
std::unique_ptr<TableBuilder> builder_;
|
|
263
|
+
std::unique_ptr<WritableFileWriter> file_writer_;
|
|
264
|
+
uint64_t current_output_file_size_ = 0;
|
|
265
|
+
|
|
266
|
+
// all the compaction outputs so far
|
|
267
|
+
std::vector<Output> outputs_;
|
|
268
|
+
|
|
269
|
+
// BlobDB info
|
|
270
|
+
std::vector<BlobFileAddition> blob_file_additions_;
|
|
271
|
+
std::unique_ptr<BlobGarbageMeter> blob_garbage_meter_;
|
|
272
|
+
|
|
273
|
+
// Basic compaction output stats for this level's outputs
|
|
274
|
+
InternalStats::CompactionOutputsStats stats_;
|
|
275
|
+
|
|
276
|
+
// indicate if this CompactionOutputs obj for penultimate_level, should always
|
|
277
|
+
// be false if per_key_placement feature is not enabled.
|
|
278
|
+
const bool is_penultimate_level_;
|
|
279
|
+
std::unique_ptr<CompactionRangeDelAggregator> range_del_agg_ = nullptr;
|
|
280
|
+
|
|
281
|
+
// partitioner information
|
|
282
|
+
std::string last_key_for_partitioner_;
|
|
283
|
+
std::unique_ptr<SstPartitioner> partitioner_;
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
// helper struct to concatenate the last level and penultimate level outputs
|
|
287
|
+
// which could be replaced by std::ranges::join_view() in c++20
|
|
288
|
+
struct OutputIterator {
|
|
289
|
+
public:
|
|
290
|
+
explicit OutputIterator(const std::vector<CompactionOutputs::Output>& a,
|
|
291
|
+
const std::vector<CompactionOutputs::Output>& b)
|
|
292
|
+
: a_(a), b_(b) {
|
|
293
|
+
within_a = !a_.empty();
|
|
294
|
+
idx_ = 0;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
OutputIterator begin() { return *this; }
|
|
298
|
+
|
|
299
|
+
OutputIterator end() { return *this; }
|
|
300
|
+
|
|
301
|
+
size_t size() { return a_.size() + b_.size(); }
|
|
302
|
+
|
|
303
|
+
const CompactionOutputs::Output& operator*() const {
|
|
304
|
+
return within_a ? a_[idx_] : b_[idx_];
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
OutputIterator& operator++() {
|
|
308
|
+
idx_++;
|
|
309
|
+
if (within_a && idx_ >= a_.size()) {
|
|
310
|
+
within_a = false;
|
|
311
|
+
idx_ = 0;
|
|
312
|
+
}
|
|
313
|
+
assert(within_a || idx_ <= b_.size());
|
|
314
|
+
return *this;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
bool operator!=(const OutputIterator& /*rhs*/) const {
|
|
318
|
+
return within_a || idx_ < b_.size();
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
private:
|
|
322
|
+
const std::vector<CompactionOutputs::Output>& a_;
|
|
323
|
+
const std::vector<CompactionOutputs::Output>& b_;
|
|
324
|
+
bool within_a;
|
|
325
|
+
size_t idx_;
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -214,13 +214,13 @@ void CompactionPicker::GetRange(const CompactionInputFiles& inputs1,
|
|
|
214
214
|
}
|
|
215
215
|
|
|
216
216
|
void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,
|
|
217
|
-
InternalKey* smallest,
|
|
218
|
-
|
|
217
|
+
InternalKey* smallest, InternalKey* largest,
|
|
218
|
+
int exclude_level) const {
|
|
219
219
|
InternalKey current_smallest;
|
|
220
220
|
InternalKey current_largest;
|
|
221
221
|
bool initialized = false;
|
|
222
222
|
for (const auto& in : inputs) {
|
|
223
|
-
if (in.empty()) {
|
|
223
|
+
if (in.empty() || in.level == exclude_level) {
|
|
224
224
|
continue;
|
|
225
225
|
}
|
|
226
226
|
GetRange(in, ¤t_smallest, ¤t_largest);
|
|
@@ -293,6 +293,12 @@ bool CompactionPicker::RangeOverlapWithCompaction(
|
|
|
293
293
|
// Overlap
|
|
294
294
|
return true;
|
|
295
295
|
}
|
|
296
|
+
if (c->SupportsPerKeyPlacement()) {
|
|
297
|
+
if (c->OverlapPenultimateLevelOutputRange(smallest_user_key,
|
|
298
|
+
largest_user_key)) {
|
|
299
|
+
return true;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
296
302
|
}
|
|
297
303
|
// Did not overlap with any running compaction in level `level`
|
|
298
304
|
return false;
|
|
@@ -301,9 +307,11 @@ bool CompactionPicker::RangeOverlapWithCompaction(
|
|
|
301
307
|
bool CompactionPicker::FilesRangeOverlapWithCompaction(
|
|
302
308
|
const std::vector<CompactionInputFiles>& inputs, int level) const {
|
|
303
309
|
bool is_empty = true;
|
|
310
|
+
int start_level = -1;
|
|
304
311
|
for (auto& in : inputs) {
|
|
305
312
|
if (!in.empty()) {
|
|
306
313
|
is_empty = false;
|
|
314
|
+
start_level = in.level; // inputs are sorted by level
|
|
307
315
|
break;
|
|
308
316
|
}
|
|
309
317
|
}
|
|
@@ -313,7 +321,19 @@ bool CompactionPicker::FilesRangeOverlapWithCompaction(
|
|
|
313
321
|
}
|
|
314
322
|
|
|
315
323
|
InternalKey smallest, largest;
|
|
316
|
-
GetRange(inputs, &smallest, &largest);
|
|
324
|
+
GetRange(inputs, &smallest, &largest, Compaction::kInvalidLevel);
|
|
325
|
+
int penultimate_level =
|
|
326
|
+
Compaction::EvaluatePenultimateLevel(ioptions_, start_level, level);
|
|
327
|
+
if (penultimate_level != Compaction::kInvalidLevel) {
|
|
328
|
+
InternalKey penultimate_smallest, penultimate_largest;
|
|
329
|
+
GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
|
|
330
|
+
if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
|
|
331
|
+
penultimate_largest.user_key(),
|
|
332
|
+
penultimate_level)) {
|
|
333
|
+
return true;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
317
337
|
return RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
|
|
318
338
|
level);
|
|
319
339
|
}
|
|
@@ -543,6 +563,10 @@ bool CompactionPicker::SetupOtherInputs(
|
|
|
543
563
|
output_level_inputs_size);
|
|
544
564
|
inputs->files = expanded_inputs.files;
|
|
545
565
|
}
|
|
566
|
+
} else {
|
|
567
|
+
// Likely to be trivial move. Expand files if they are still trivial moves,
|
|
568
|
+
// but limit to mutable_cf_options.max_compaction_bytes or 8 files so that
|
|
569
|
+
// we don't create too much compaction pressure for the next level.
|
|
546
570
|
}
|
|
547
571
|
return true;
|
|
548
572
|
}
|
|
@@ -641,7 +665,8 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
641
665
|
GetCompressionOptions(mutable_cf_options, vstorage, output_level),
|
|
642
666
|
Temperature::kUnknown, compact_range_options.max_subcompactions,
|
|
643
667
|
/* grandparents */ {}, /* is manual */ true, trim_ts, /* score */ -1,
|
|
644
|
-
/* deletion_compaction */ false,
|
|
668
|
+
/* deletion_compaction */ false, /* l0_files_might_overlap */ true,
|
|
669
|
+
CompactionReason::kUnknown,
|
|
645
670
|
compact_range_options.blob_garbage_collection_policy,
|
|
646
671
|
compact_range_options.blob_garbage_collection_age_cutoff);
|
|
647
672
|
|
|
@@ -823,7 +848,8 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
823
848
|
GetCompressionOptions(mutable_cf_options, vstorage, output_level),
|
|
824
849
|
Temperature::kUnknown, compact_range_options.max_subcompactions,
|
|
825
850
|
std::move(grandparents), /* is manual */ true, trim_ts, /* score */ -1,
|
|
826
|
-
/* deletion_compaction */ false,
|
|
851
|
+
/* deletion_compaction */ false, /* l0_files_might_overlap */ true,
|
|
852
|
+
CompactionReason::kUnknown,
|
|
827
853
|
compact_range_options.blob_garbage_collection_policy,
|
|
828
854
|
compact_range_options.blob_garbage_collection_age_cutoff);
|
|
829
855
|
|
|
@@ -154,7 +154,8 @@ class CompactionPicker {
|
|
|
154
154
|
// in *smallest, *largest.
|
|
155
155
|
// REQUIRES: inputs is not empty (at least on entry have one file)
|
|
156
156
|
void GetRange(const std::vector<CompactionInputFiles>& inputs,
|
|
157
|
-
InternalKey* smallest, InternalKey* largest
|
|
157
|
+
InternalKey* smallest, InternalKey* largest,
|
|
158
|
+
int exclude_level) const;
|
|
158
159
|
|
|
159
160
|
int NumberLevels() const { return ioptions_.num_levels; }
|
|
160
161
|
|
|
@@ -217,6 +218,8 @@ class CompactionPicker {
|
|
|
217
218
|
return &compactions_in_progress_;
|
|
218
219
|
}
|
|
219
220
|
|
|
221
|
+
const InternalKeyComparator* icmp() const { return icmp_; }
|
|
222
|
+
|
|
220
223
|
protected:
|
|
221
224
|
const ImmutableOptions& ioptions_;
|
|
222
225
|
|
|
@@ -116,7 +116,8 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
|
|
|
116
116
|
mutable_cf_options.compression_opts, Temperature::kUnknown,
|
|
117
117
|
/* max_subcompactions */ 0, {}, /* is manual */ false,
|
|
118
118
|
/* trim_ts */ "", vstorage->CompactionScore(0),
|
|
119
|
-
/* is deletion compaction */ true,
|
|
119
|
+
/* is deletion compaction */ true, /* l0_files_might_overlap */ true,
|
|
120
|
+
CompactionReason::kFIFOTtl);
|
|
120
121
|
return c;
|
|
121
122
|
}
|
|
122
123
|
|
|
@@ -160,6 +161,7 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
|
|
|
160
161
|
0 /* max_subcompactions */, {}, /* is manual */ false,
|
|
161
162
|
/* trim_ts */ "", vstorage->CompactionScore(0),
|
|
162
163
|
/* is deletion compaction */ false,
|
|
164
|
+
/* l0_files_might_overlap */ true,
|
|
163
165
|
CompactionReason::kFIFOReduceNumFiles);
|
|
164
166
|
return c;
|
|
165
167
|
}
|
|
@@ -209,7 +211,8 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
|
|
|
209
211
|
mutable_cf_options.compression_opts, Temperature::kUnknown,
|
|
210
212
|
/* max_subcompactions */ 0, {}, /* is manual */ false,
|
|
211
213
|
/* trim_ts */ "", vstorage->CompactionScore(0),
|
|
212
|
-
/* is deletion compaction */ true,
|
|
214
|
+
/* is deletion compaction */ true,
|
|
215
|
+
/* l0_files_might_overlap */ true, CompactionReason::kFIFOMaxSize);
|
|
213
216
|
return c;
|
|
214
217
|
}
|
|
215
218
|
|
|
@@ -315,7 +318,8 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm(
|
|
|
315
318
|
Temperature::kWarm,
|
|
316
319
|
/* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "",
|
|
317
320
|
vstorage->CompactionScore(0),
|
|
318
|
-
/* is deletion compaction */ false,
|
|
321
|
+
/* is deletion compaction */ false, /* l0_files_might_overlap */ true,
|
|
322
|
+
CompactionReason::kChangeTemperature);
|
|
319
323
|
return c;
|
|
320
324
|
}
|
|
321
325
|
|