@nxtedition/rocksdb 7.0.24 → 7.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +12 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/deps/rocksdb/rocksdb/src.mk +5 -0
  122. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  127. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  131. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  133. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  135. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  136. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  137. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  138. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  143. package/deps/rocksdb/rocksdb.gyp +5 -1
  144. package/package.json +1 -1
  145. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  146. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -0,0 +1,328 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+ //
7
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8
+ // Use of this source code is governed by a BSD-style license that can be
9
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
+
11
+ #pragma once
12
+
13
+ #include "db/blob/blob_garbage_meter.h"
14
+ #include "db/compaction/compaction.h"
15
+ #include "db/compaction/compaction_iterator.h"
16
+ #include "db/internal_stats.h"
17
+ #include "db/output_validator.h"
18
+
19
+ namespace ROCKSDB_NAMESPACE {
20
+
21
+ class CompactionOutputs;
22
+ using CompactionFileOpenFunc = std::function<Status(CompactionOutputs&)>;
23
+ using CompactionFileCloseFunc =
24
+ std::function<Status(CompactionOutputs&, const Status&, const Slice&)>;
25
+
26
+ // Files produced by subcompaction, most of the functions are used by
27
+ // compaction_job Open/Close compaction file functions.
28
+ class CompactionOutputs {
29
+ public:
30
+ // compaction output file
31
+ struct Output {
32
+ Output(FileMetaData&& _meta, const InternalKeyComparator& _icmp,
33
+ bool _enable_order_check, bool _enable_hash, bool _finished,
34
+ uint64_t precalculated_hash)
35
+ : meta(std::move(_meta)),
36
+ validator(_icmp, _enable_order_check, _enable_hash,
37
+ precalculated_hash),
38
+ finished(_finished) {}
39
+ FileMetaData meta;
40
+ OutputValidator validator;
41
+ bool finished;
42
+ std::shared_ptr<const TableProperties> table_properties;
43
+ };
44
+
45
+ CompactionOutputs() = delete;
46
+
47
+ explicit CompactionOutputs(const Compaction* compaction,
48
+ const bool is_penultimate_level)
49
+ : compaction_(compaction), is_penultimate_level_(is_penultimate_level) {
50
+ partitioner_ = compaction->output_level() == 0
51
+ ? nullptr
52
+ : compaction->CreateSstPartitioner();
53
+ }
54
+
55
+ // Add generated output to the list
56
+ void AddOutput(FileMetaData&& meta, const InternalKeyComparator& icmp,
57
+ bool enable_order_check, bool enable_hash,
58
+ bool finished = false, uint64_t precalculated_hash = 0) {
59
+ outputs_.emplace_back(std::move(meta), icmp, enable_order_check,
60
+ enable_hash, finished, precalculated_hash);
61
+ }
62
+
63
+ // Set new table builder for the current output
64
+ void NewBuilder(const TableBuilderOptions& tboptions);
65
+
66
+ // Assign a new WritableFileWriter to the current output
67
+ void AssignFileWriter(WritableFileWriter* writer) {
68
+ file_writer_.reset(writer);
69
+ }
70
+
71
+ // TODO: Remove it when remote compaction support tiered compaction
72
+ void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; }
73
+ void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
74
+
75
+ // TODO: Move the BlobDB builder into CompactionOutputs
76
+ const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
77
+ if (is_penultimate_level_) {
78
+ assert(blob_file_additions_.empty());
79
+ }
80
+ return blob_file_additions_;
81
+ }
82
+
83
+ std::vector<BlobFileAddition>* GetBlobFileAdditionsPtr() {
84
+ assert(!is_penultimate_level_);
85
+ return &blob_file_additions_;
86
+ }
87
+
88
+ bool HasBlobFileAdditions() const { return !blob_file_additions_.empty(); }
89
+
90
+ BlobGarbageMeter* CreateBlobGarbageMeter() {
91
+ assert(!is_penultimate_level_);
92
+ blob_garbage_meter_ = std::make_unique<BlobGarbageMeter>();
93
+ return blob_garbage_meter_.get();
94
+ }
95
+
96
+ BlobGarbageMeter* GetBlobGarbageMeter() const {
97
+ if (is_penultimate_level_) {
98
+ // blobdb doesn't support per_key_placement yet
99
+ assert(blob_garbage_meter_ == nullptr);
100
+ return nullptr;
101
+ }
102
+ return blob_garbage_meter_.get();
103
+ }
104
+
105
+ void UpdateBlobStats() {
106
+ assert(!is_penultimate_level_);
107
+ stats_.num_output_files_blob = blob_file_additions_.size();
108
+ for (const auto& blob : blob_file_additions_) {
109
+ stats_.bytes_written_blob += blob.GetTotalBlobBytes();
110
+ }
111
+ }
112
+
113
+ // Finish the current output file
114
+ Status Finish(const Status& intput_status);
115
+
116
+ // Update output table properties from table builder
117
+ void UpdateTableProperties() {
118
+ current_output().table_properties =
119
+ std::make_shared<TableProperties>(GetTableProperties());
120
+ }
121
+
122
+ IOStatus WriterSyncClose(const Status& intput_status, SystemClock* clock,
123
+ Statistics* statistics, bool use_fsync);
124
+
125
+ TableProperties GetTableProperties() {
126
+ return builder_->GetTableProperties();
127
+ }
128
+
129
+ Slice SmallestUserKey() const {
130
+ if (!outputs_.empty() && outputs_[0].finished) {
131
+ return outputs_[0].meta.smallest.user_key();
132
+ } else {
133
+ return Slice{nullptr, 0};
134
+ }
135
+ }
136
+
137
+ Slice LargestUserKey() const {
138
+ if (!outputs_.empty() && outputs_.back().finished) {
139
+ return outputs_.back().meta.largest.user_key();
140
+ } else {
141
+ return Slice{nullptr, 0};
142
+ }
143
+ }
144
+
145
+ // In case the last output file is empty, which doesn't need to keep.
146
+ void RemoveLastEmptyOutput() {
147
+ if (!outputs_.empty() && !outputs_.back().meta.fd.file_size) {
148
+ // An error occurred, so ignore the last output.
149
+ outputs_.pop_back();
150
+ }
151
+ }
152
+
153
+ // Remove the last output, for example the last output doesn't have data (no
154
+ // entry and no range-dels), but file_size might not be 0, as it has SST
155
+ // metadata.
156
+ void RemoveLastOutput() {
157
+ assert(!outputs_.empty());
158
+ outputs_.pop_back();
159
+ }
160
+
161
+ bool HasBuilder() const { return builder_ != nullptr; }
162
+
163
+ FileMetaData* GetMetaData() { return &current_output().meta; }
164
+
165
+ bool HasOutput() const { return !outputs_.empty(); }
166
+
167
+ uint64_t NumEntries() const { return builder_->NumEntries(); }
168
+
169
+ void ResetBuilder() {
170
+ builder_.reset();
171
+ current_output_file_size_ = 0;
172
+ }
173
+
174
+ // Add range-dels from the aggregator to the current output file
175
+ Status AddRangeDels(const Slice* comp_start, const Slice* comp_end,
176
+ CompactionIterationStats& range_del_out_stats,
177
+ bool bottommost_level, const InternalKeyComparator& icmp,
178
+ SequenceNumber earliest_snapshot,
179
+ const Slice& next_table_min_key);
180
+
181
+ // Is the current file is already pending for close
182
+ bool IsPendingClose() const { return pending_close_; }
183
+
184
+ // Current file should close before adding a new key
185
+ void SetPendingClose() { pending_close_ = true; }
186
+
187
+ // if the outputs have range delete, range delete is also data
188
+ bool HasRangeDel() const {
189
+ return range_del_agg_ && !range_del_agg_->IsEmpty();
190
+ }
191
+
192
+ private:
193
+ friend class SubcompactionState;
194
+
195
+ void Cleanup() {
196
+ if (builder_ != nullptr) {
197
+ // May happen if we get a shutdown call in the middle of compaction
198
+ builder_->Abandon();
199
+ builder_.reset();
200
+ }
201
+ }
202
+
203
+ uint64_t GetCurrentOutputFileSize() const {
204
+ return current_output_file_size_;
205
+ }
206
+
207
+ // Add curent key from compaction_iterator to the output file. If needed
208
+ // close and open new compaction output with the functions provided.
209
+ Status AddToOutput(const CompactionIterator& c_iter,
210
+ const CompactionFileOpenFunc& open_file_func,
211
+ const CompactionFileCloseFunc& close_file_func);
212
+
213
+ // Close the current output. `open_file_func` is needed for creating new file
214
+ // for range-dels only output file.
215
+ Status CloseOutput(const Status& curr_status,
216
+ const CompactionFileOpenFunc& open_file_func,
217
+ const CompactionFileCloseFunc& close_file_func) {
218
+ Status status = curr_status;
219
+ // handle subcompaction containing only range deletions
220
+ if (status.ok() && !HasBuilder() && !HasOutput() && HasRangeDel()) {
221
+ status = open_file_func(*this);
222
+ }
223
+ if (HasBuilder()) {
224
+ const Slice empty_key{};
225
+ Status s = close_file_func(*this, status, empty_key);
226
+ if (!s.ok() && status.ok()) {
227
+ status = s;
228
+ }
229
+ }
230
+
231
+ return status;
232
+ }
233
+
234
+ // This subcompaction's output could be empty if compaction was aborted before
235
+ // this subcompaction had a chance to generate any output files. When
236
+ // subcompactions are executed sequentially this is more likely and will be
237
+ // particularly likely for the later subcompactions to be empty. Once they are
238
+ // run in parallel however it should be much rarer.
239
+ // It's caller's responsibility to make sure it's not empty.
240
+ Output& current_output() {
241
+ assert(!outputs_.empty());
242
+ return outputs_.back();
243
+ }
244
+
245
+ // Assign the range_del_agg to the target output level. There's only one
246
+ // range-del-aggregator per compaction outputs, for
247
+ // output_to_penultimate_level compaction it is only assigned to the
248
+ // penultimate level.
249
+ void AssignRangeDelAggregator(
250
+ std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
251
+ assert(range_del_agg_ == nullptr);
252
+ range_del_agg_ = std::move(range_del_agg);
253
+ }
254
+
255
+ const Compaction* compaction_;
256
+
257
+ // The current file is pending close, which needs to run `close_file_func()`
258
+ // first to add a new key.
259
+ bool pending_close_ = false;
260
+
261
+ // current output builder and writer
262
+ std::unique_ptr<TableBuilder> builder_;
263
+ std::unique_ptr<WritableFileWriter> file_writer_;
264
+ uint64_t current_output_file_size_ = 0;
265
+
266
+ // all the compaction outputs so far
267
+ std::vector<Output> outputs_;
268
+
269
+ // BlobDB info
270
+ std::vector<BlobFileAddition> blob_file_additions_;
271
+ std::unique_ptr<BlobGarbageMeter> blob_garbage_meter_;
272
+
273
+ // Basic compaction output stats for this level's outputs
274
+ InternalStats::CompactionOutputsStats stats_;
275
+
276
+ // indicate if this CompactionOutputs obj for penultimate_level, should always
277
+ // be false if per_key_placement feature is not enabled.
278
+ const bool is_penultimate_level_;
279
+ std::unique_ptr<CompactionRangeDelAggregator> range_del_agg_ = nullptr;
280
+
281
+ // partitioner information
282
+ std::string last_key_for_partitioner_;
283
+ std::unique_ptr<SstPartitioner> partitioner_;
284
+ };
285
+
286
+ // helper struct to concatenate the last level and penultimate level outputs
287
+ // which could be replaced by std::ranges::join_view() in c++20
288
+ struct OutputIterator {
289
+ public:
290
+ explicit OutputIterator(const std::vector<CompactionOutputs::Output>& a,
291
+ const std::vector<CompactionOutputs::Output>& b)
292
+ : a_(a), b_(b) {
293
+ within_a = !a_.empty();
294
+ idx_ = 0;
295
+ }
296
+
297
+ OutputIterator begin() { return *this; }
298
+
299
+ OutputIterator end() { return *this; }
300
+
301
+ size_t size() { return a_.size() + b_.size(); }
302
+
303
+ const CompactionOutputs::Output& operator*() const {
304
+ return within_a ? a_[idx_] : b_[idx_];
305
+ }
306
+
307
+ OutputIterator& operator++() {
308
+ idx_++;
309
+ if (within_a && idx_ >= a_.size()) {
310
+ within_a = false;
311
+ idx_ = 0;
312
+ }
313
+ assert(within_a || idx_ <= b_.size());
314
+ return *this;
315
+ }
316
+
317
+ bool operator!=(const OutputIterator& /*rhs*/) const {
318
+ return within_a || idx_ < b_.size();
319
+ }
320
+
321
+ private:
322
+ const std::vector<CompactionOutputs::Output>& a_;
323
+ const std::vector<CompactionOutputs::Output>& b_;
324
+ bool within_a;
325
+ size_t idx_;
326
+ };
327
+
328
+ } // namespace ROCKSDB_NAMESPACE
@@ -214,13 +214,13 @@ void CompactionPicker::GetRange(const CompactionInputFiles& inputs1,
214
214
  }
215
215
 
216
216
  void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,
217
- InternalKey* smallest,
218
- InternalKey* largest) const {
217
+ InternalKey* smallest, InternalKey* largest,
218
+ int exclude_level) const {
219
219
  InternalKey current_smallest;
220
220
  InternalKey current_largest;
221
221
  bool initialized = false;
222
222
  for (const auto& in : inputs) {
223
- if (in.empty()) {
223
+ if (in.empty() || in.level == exclude_level) {
224
224
  continue;
225
225
  }
226
226
  GetRange(in, &current_smallest, &current_largest);
@@ -293,6 +293,12 @@ bool CompactionPicker::RangeOverlapWithCompaction(
293
293
  // Overlap
294
294
  return true;
295
295
  }
296
+ if (c->SupportsPerKeyPlacement()) {
297
+ if (c->OverlapPenultimateLevelOutputRange(smallest_user_key,
298
+ largest_user_key)) {
299
+ return true;
300
+ }
301
+ }
296
302
  }
297
303
  // Did not overlap with any running compaction in level `level`
298
304
  return false;
@@ -301,9 +307,11 @@ bool CompactionPicker::RangeOverlapWithCompaction(
301
307
  bool CompactionPicker::FilesRangeOverlapWithCompaction(
302
308
  const std::vector<CompactionInputFiles>& inputs, int level) const {
303
309
  bool is_empty = true;
310
+ int start_level = -1;
304
311
  for (auto& in : inputs) {
305
312
  if (!in.empty()) {
306
313
  is_empty = false;
314
+ start_level = in.level; // inputs are sorted by level
307
315
  break;
308
316
  }
309
317
  }
@@ -313,7 +321,19 @@ bool CompactionPicker::FilesRangeOverlapWithCompaction(
313
321
  }
314
322
 
315
323
  InternalKey smallest, largest;
316
- GetRange(inputs, &smallest, &largest);
324
+ GetRange(inputs, &smallest, &largest, Compaction::kInvalidLevel);
325
+ int penultimate_level =
326
+ Compaction::EvaluatePenultimateLevel(ioptions_, start_level, level);
327
+ if (penultimate_level != Compaction::kInvalidLevel) {
328
+ InternalKey penultimate_smallest, penultimate_largest;
329
+ GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
330
+ if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
331
+ penultimate_largest.user_key(),
332
+ penultimate_level)) {
333
+ return true;
334
+ }
335
+ }
336
+
317
337
  return RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
318
338
  level);
319
339
  }
@@ -543,6 +563,10 @@ bool CompactionPicker::SetupOtherInputs(
543
563
  output_level_inputs_size);
544
564
  inputs->files = expanded_inputs.files;
545
565
  }
566
+ } else {
567
+ // Likely to be trivial move. Expand files if they are still trivial moves,
568
+ // but limit to mutable_cf_options.max_compaction_bytes or 8 files so that
569
+ // we don't create too much compaction pressure for the next level.
546
570
  }
547
571
  return true;
548
572
  }
@@ -641,7 +665,8 @@ Compaction* CompactionPicker::CompactRange(
641
665
  GetCompressionOptions(mutable_cf_options, vstorage, output_level),
642
666
  Temperature::kUnknown, compact_range_options.max_subcompactions,
643
667
  /* grandparents */ {}, /* is manual */ true, trim_ts, /* score */ -1,
644
- /* deletion_compaction */ false, CompactionReason::kUnknown,
668
+ /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
669
+ CompactionReason::kUnknown,
645
670
  compact_range_options.blob_garbage_collection_policy,
646
671
  compact_range_options.blob_garbage_collection_age_cutoff);
647
672
 
@@ -823,7 +848,8 @@ Compaction* CompactionPicker::CompactRange(
823
848
  GetCompressionOptions(mutable_cf_options, vstorage, output_level),
824
849
  Temperature::kUnknown, compact_range_options.max_subcompactions,
825
850
  std::move(grandparents), /* is manual */ true, trim_ts, /* score */ -1,
826
- /* deletion_compaction */ false, CompactionReason::kUnknown,
851
+ /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
852
+ CompactionReason::kUnknown,
827
853
  compact_range_options.blob_garbage_collection_policy,
828
854
  compact_range_options.blob_garbage_collection_age_cutoff);
829
855
 
@@ -154,7 +154,8 @@ class CompactionPicker {
154
154
  // in *smallest, *largest.
155
155
  // REQUIRES: inputs is not empty (at least on entry have one file)
156
156
  void GetRange(const std::vector<CompactionInputFiles>& inputs,
157
- InternalKey* smallest, InternalKey* largest) const;
157
+ InternalKey* smallest, InternalKey* largest,
158
+ int exclude_level) const;
158
159
 
159
160
  int NumberLevels() const { return ioptions_.num_levels; }
160
161
 
@@ -217,6 +218,8 @@ class CompactionPicker {
217
218
  return &compactions_in_progress_;
218
219
  }
219
220
 
221
+ const InternalKeyComparator* icmp() const { return icmp_; }
222
+
220
223
  protected:
221
224
  const ImmutableOptions& ioptions_;
222
225
 
@@ -116,7 +116,8 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
116
116
  mutable_cf_options.compression_opts, Temperature::kUnknown,
117
117
  /* max_subcompactions */ 0, {}, /* is manual */ false,
118
118
  /* trim_ts */ "", vstorage->CompactionScore(0),
119
- /* is deletion compaction */ true, CompactionReason::kFIFOTtl);
119
+ /* is deletion compaction */ true, /* l0_files_might_overlap */ true,
120
+ CompactionReason::kFIFOTtl);
120
121
  return c;
121
122
  }
122
123
 
@@ -160,6 +161,7 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
160
161
  0 /* max_subcompactions */, {}, /* is manual */ false,
161
162
  /* trim_ts */ "", vstorage->CompactionScore(0),
162
163
  /* is deletion compaction */ false,
164
+ /* l0_files_might_overlap */ true,
163
165
  CompactionReason::kFIFOReduceNumFiles);
164
166
  return c;
165
167
  }
@@ -209,7 +211,8 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
209
211
  mutable_cf_options.compression_opts, Temperature::kUnknown,
210
212
  /* max_subcompactions */ 0, {}, /* is manual */ false,
211
213
  /* trim_ts */ "", vstorage->CompactionScore(0),
212
- /* is deletion compaction */ true, CompactionReason::kFIFOMaxSize);
214
+ /* is deletion compaction */ true,
215
+ /* l0_files_might_overlap */ true, CompactionReason::kFIFOMaxSize);
213
216
  return c;
214
217
  }
215
218
 
@@ -315,7 +318,8 @@ Compaction* FIFOCompactionPicker::PickCompactionToWarm(
315
318
  Temperature::kWarm,
316
319
  /* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "",
317
320
  vstorage->CompactionScore(0),
318
- /* is deletion compaction */ false, CompactionReason::kChangeTemperature);
321
+ /* is deletion compaction */ false, /* l0_files_might_overlap */ true,
322
+ CompactionReason::kChangeTemperature);
319
323
  return c;
320
324
  }
321
325