@nxtedition/rocksdb 8.0.1 → 8.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/TARGETS +4 -2
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +8 -29
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +146 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +13 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +20 -146
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +32 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +11 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +11 -9
- package/deps/rocksdb/rocksdb/db/column_family.h +20 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +27 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +65 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -32
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +28 -47
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +28 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +170 -140
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +72 -5
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +119 -10
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +585 -264
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +46 -18
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +6 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +10 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +250 -2
- package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +307 -8
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +5 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +5 -2
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +64 -22
- package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
- package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +90 -43
- package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +190 -67
- package/deps/rocksdb/rocksdb/db/version_edit.cc +15 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +16 -4
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +41 -11
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +27 -12
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +18 -16
- package/deps/rocksdb/rocksdb/db/version_set.cc +212 -35
- package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +45 -25
- package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +0 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +12 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +6 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +1 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +0 -48
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +196 -171
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +27 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/logging/logging.h +13 -19
- package/deps/rocksdb/rocksdb/memory/arena.cc +4 -3
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +30 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +142 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +241 -0
- package/deps/rocksdb/rocksdb/table/format.cc +24 -20
- package/deps/rocksdb/rocksdb/table/format.h +5 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +97 -115
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +82 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
- package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/status.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -67
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -3
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb.gyp +2 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +0 -580
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +0 -476
|
@@ -565,7 +565,8 @@ ColumnFamilyData::ColumnFamilyData(
|
|
|
565
565
|
allow_2pc_(db_options.allow_2pc),
|
|
566
566
|
last_memtable_id_(0),
|
|
567
567
|
db_paths_registered_(false),
|
|
568
|
-
mempurge_used_(false)
|
|
568
|
+
mempurge_used_(false),
|
|
569
|
+
next_epoch_number_(1) {
|
|
569
570
|
if (id_ != kDummyColumnFamilyDataId) {
|
|
570
571
|
// TODO(cc): RegisterDbPaths can be expensive, considering moving it
|
|
571
572
|
// outside of this constructor which might be called with db mutex held.
|
|
@@ -1128,12 +1129,9 @@ bool ColumnFamilyData::NeedsCompaction() const {
|
|
|
1128
1129
|
Compaction* ColumnFamilyData::PickCompaction(
|
|
1129
1130
|
const MutableCFOptions& mutable_options,
|
|
1130
1131
|
const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
|
|
1131
|
-
SequenceNumber earliest_mem_seqno =
|
|
1132
|
-
std::min(mem_->GetEarliestSequenceNumber(),
|
|
1133
|
-
imm_.current()->GetEarliestSequenceNumber(false));
|
|
1134
1132
|
auto* result = compaction_picker_->PickCompaction(
|
|
1135
1133
|
GetName(), mutable_options, mutable_db_options, current_->storage_info(),
|
|
1136
|
-
log_buffer
|
|
1134
|
+
log_buffer);
|
|
1137
1135
|
if (result != nullptr) {
|
|
1138
1136
|
result->SetInputVersion(current_);
|
|
1139
1137
|
}
|
|
@@ -1212,14 +1210,11 @@ Compaction* ColumnFamilyData::CompactRange(
|
|
|
1212
1210
|
const InternalKey* begin, const InternalKey* end,
|
|
1213
1211
|
InternalKey** compaction_end, bool* conflict,
|
|
1214
1212
|
uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
|
|
1215
|
-
SequenceNumber earliest_mem_seqno =
|
|
1216
|
-
std::min(mem_->GetEarliestSequenceNumber(),
|
|
1217
|
-
imm_.current()->GetEarliestSequenceNumber(false));
|
|
1218
1213
|
auto* result = compaction_picker_->CompactRange(
|
|
1219
1214
|
GetName(), mutable_cf_options, mutable_db_options,
|
|
1220
1215
|
current_->storage_info(), input_level, output_level,
|
|
1221
1216
|
compact_range_options, begin, end, compaction_end, conflict,
|
|
1222
|
-
max_file_num_to_ignore, trim_ts
|
|
1217
|
+
max_file_num_to_ignore, trim_ts);
|
|
1223
1218
|
if (result != nullptr) {
|
|
1224
1219
|
result->SetInputVersion(current_);
|
|
1225
1220
|
}
|
|
@@ -1523,6 +1518,13 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
|
|
|
1523
1518
|
return data_dirs_[path_id].get();
|
|
1524
1519
|
}
|
|
1525
1520
|
|
|
1521
|
+
void ColumnFamilyData::RecoverEpochNumbers() {
|
|
1522
|
+
assert(current_);
|
|
1523
|
+
auto* vstorage = current_->storage_info();
|
|
1524
|
+
assert(vstorage);
|
|
1525
|
+
vstorage->RecoverEpochNumbers(this);
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1526
1528
|
ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
|
|
1527
1529
|
const ImmutableDBOptions* db_options,
|
|
1528
1530
|
const FileOptions& file_options,
|
|
@@ -533,6 +533,24 @@ class ColumnFamilyData {
|
|
|
533
533
|
void SetMempurgeUsed() { mempurge_used_ = true; }
|
|
534
534
|
bool GetMempurgeUsed() { return mempurge_used_; }
|
|
535
535
|
|
|
536
|
+
// Allocate and return a new epoch number
|
|
537
|
+
uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
|
|
538
|
+
|
|
539
|
+
// Get the next epoch number to be assigned
|
|
540
|
+
uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
|
|
541
|
+
|
|
542
|
+
// Set the next epoch number to be assigned
|
|
543
|
+
void SetNextEpochNumber(uint64_t next_epoch_number) {
|
|
544
|
+
next_epoch_number_.store(next_epoch_number);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Reset the next epoch number to be assigned
|
|
548
|
+
void ResetNextEpochNumber() { next_epoch_number_.store(1); }
|
|
549
|
+
|
|
550
|
+
// Recover the next epoch number of this CF and epoch number
|
|
551
|
+
// of its files (if missing)
|
|
552
|
+
void RecoverEpochNumbers();
|
|
553
|
+
|
|
536
554
|
private:
|
|
537
555
|
friend class ColumnFamilySet;
|
|
538
556
|
ColumnFamilyData(uint32_t id, const std::string& name,
|
|
@@ -634,6 +652,8 @@ class ColumnFamilyData {
|
|
|
634
652
|
// a Version associated with this CFD
|
|
635
653
|
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
|
|
636
654
|
bool mempurge_used_;
|
|
655
|
+
|
|
656
|
+
std::atomic<uint64_t> next_epoch_number_;
|
|
637
657
|
};
|
|
638
658
|
|
|
639
659
|
// ColumnFamilySet has interesting thread-safety requirements
|
|
@@ -188,6 +188,11 @@ class ClippingIterator : public InternalIterator {
|
|
|
188
188
|
return iter_->GetProperty(prop_name, prop);
|
|
189
189
|
}
|
|
190
190
|
|
|
191
|
+
bool IsDeleteRangeSentinelKey() const override {
|
|
192
|
+
assert(valid_);
|
|
193
|
+
return iter_->IsDeleteRangeSentinelKey();
|
|
194
|
+
}
|
|
195
|
+
|
|
191
196
|
private:
|
|
192
197
|
void UpdateValid() {
|
|
193
198
|
assert(!iter_->Valid() || iter_->status().ok());
|
|
@@ -20,9 +20,6 @@
|
|
|
20
20
|
|
|
21
21
|
namespace ROCKSDB_NAMESPACE {
|
|
22
22
|
|
|
23
|
-
const uint64_t kRangeTombstoneSentinel =
|
|
24
|
-
PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
|
|
25
|
-
|
|
26
23
|
int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
|
|
27
24
|
const InternalKey& b) {
|
|
28
25
|
auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key());
|
|
@@ -332,6 +329,7 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
332
329
|
// the case that the penultimate level is empty).
|
|
333
330
|
if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
|
|
334
331
|
exclude_level = kInvalidLevel;
|
|
332
|
+
penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
|
|
335
333
|
std::set<uint64_t> penultimate_inputs;
|
|
336
334
|
for (const auto& input_lvl : inputs_) {
|
|
337
335
|
if (input_lvl.level == penultimate_level_) {
|
|
@@ -345,7 +343,8 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
345
343
|
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
346
344
|
penultimate_inputs.end()) {
|
|
347
345
|
exclude_level = number_levels_ - 1;
|
|
348
|
-
penultimate_output_range_type_ =
|
|
346
|
+
penultimate_output_range_type_ =
|
|
347
|
+
PenultimateOutputRangeType::kNonLastRange;
|
|
349
348
|
break;
|
|
350
349
|
}
|
|
351
350
|
}
|
|
@@ -354,35 +353,6 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
354
353
|
GetBoundaryKeys(input_vstorage_, inputs_,
|
|
355
354
|
&penultimate_level_smallest_user_key_,
|
|
356
355
|
&penultimate_level_largest_user_key_, exclude_level);
|
|
357
|
-
|
|
358
|
-
// If there's a case that the penultimate level output range is overlapping
|
|
359
|
-
// with the existing files, disable the penultimate level output by setting
|
|
360
|
-
// the range to empty. One example is the range delete could have overlap
|
|
361
|
-
// boundary with the next file. (which is actually a false overlap)
|
|
362
|
-
// TODO: Exclude such false overlap, so it won't disable the penultimate
|
|
363
|
-
// output.
|
|
364
|
-
std::set<uint64_t> penultimate_inputs;
|
|
365
|
-
for (const auto& input_lvl : inputs_) {
|
|
366
|
-
if (input_lvl.level == penultimate_level_) {
|
|
367
|
-
for (const auto& file : input_lvl.files) {
|
|
368
|
-
penultimate_inputs.emplace(file->fd.GetNumber());
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
|
|
374
|
-
for (const auto& file : penultimate_files) {
|
|
375
|
-
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
376
|
-
penultimate_inputs.end() &&
|
|
377
|
-
OverlapPenultimateLevelOutputRange(file->smallest.user_key(),
|
|
378
|
-
file->largest.user_key())) {
|
|
379
|
-
// basically disable the penultimate range output. which should be rare
|
|
380
|
-
// or a false overlap caused by range del
|
|
381
|
-
penultimate_level_smallest_user_key_ = "";
|
|
382
|
-
penultimate_level_largest_user_key_ = "";
|
|
383
|
-
penultimate_output_range_type_ = PenultimateOutputRangeType::kDisabled;
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
356
|
}
|
|
387
357
|
|
|
388
358
|
Compaction::~Compaction() {
|
|
@@ -807,6 +777,16 @@ uint64_t Compaction::MinInputFileOldestAncesterTime(
|
|
|
807
777
|
return min_oldest_ancester_time;
|
|
808
778
|
}
|
|
809
779
|
|
|
780
|
+
uint64_t Compaction::MinInputFileEpochNumber() const {
|
|
781
|
+
uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
|
|
782
|
+
for (const auto& inputs_per_level : inputs_) {
|
|
783
|
+
for (const auto& file : inputs_per_level.files) {
|
|
784
|
+
min_epoch_number = std::min(min_epoch_number, file->epoch_number);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
return min_epoch_number;
|
|
788
|
+
}
|
|
789
|
+
|
|
810
790
|
int Compaction::EvaluatePenultimateLevel(
|
|
811
791
|
const VersionStorageInfo* vstorage,
|
|
812
792
|
const ImmutableOptions& immutable_options, const int start_level,
|
|
@@ -18,6 +18,8 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
18
18
|
// The file contains class Compaction, as well as some helper functions
|
|
19
19
|
// and data structures used by the class.
|
|
20
20
|
|
|
21
|
+
const uint64_t kRangeTombstoneSentinel =
|
|
22
|
+
PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
|
|
21
23
|
// Utility for comparing sstable boundary keys. Returns -1 if either a or b is
|
|
22
24
|
// null which provides the property that a==null indicates a key that is less
|
|
23
25
|
// than any key and b==null indicates a key that is greater than any key. Note
|
|
@@ -378,6 +380,9 @@ class Compaction {
|
|
|
378
380
|
// This is used to filter out some input files' ancester's time range.
|
|
379
381
|
uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
|
|
380
382
|
const InternalKey* end) const;
|
|
383
|
+
// Return the minimum epoch number among
|
|
384
|
+
// input files' associated with this compaction
|
|
385
|
+
uint64_t MinInputFileEpochNumber() const;
|
|
381
386
|
|
|
382
387
|
// Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
|
|
383
388
|
// compaction begin and compaction completion callbacks match.
|
|
@@ -377,6 +377,7 @@ void CompactionIterator::NextFromInput() {
|
|
|
377
377
|
value_ = input_.value();
|
|
378
378
|
blob_value_.Reset();
|
|
379
379
|
iter_stats_.num_input_records++;
|
|
380
|
+
is_range_del_ = input_.IsDeleteRangeSentinelKey();
|
|
380
381
|
|
|
381
382
|
Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
|
|
382
383
|
if (!pik_status.ok()) {
|
|
@@ -396,7 +397,10 @@ void CompactionIterator::NextFromInput() {
|
|
|
396
397
|
break;
|
|
397
398
|
}
|
|
398
399
|
TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
|
|
399
|
-
|
|
400
|
+
if (is_range_del_) {
|
|
401
|
+
validity_info_.SetValid(kRangeDeletion);
|
|
402
|
+
break;
|
|
403
|
+
}
|
|
400
404
|
// Update input statistics
|
|
401
405
|
if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion ||
|
|
402
406
|
ikey_.type == kTypeDeletionWithTimestamp) {
|
|
@@ -618,6 +622,14 @@ void CompactionIterator::NextFromInput() {
|
|
|
618
622
|
|
|
619
623
|
ParsedInternalKey next_ikey;
|
|
620
624
|
AdvanceInputIter();
|
|
625
|
+
while (input_.Valid() && input_.IsDeleteRangeSentinelKey() &&
|
|
626
|
+
ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
627
|
+
.ok() &&
|
|
628
|
+
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
|
|
629
|
+
// skip range tombstone start keys with the same user key
|
|
630
|
+
// since they are not "real" point keys.
|
|
631
|
+
AdvanceInputIter();
|
|
632
|
+
}
|
|
621
633
|
|
|
622
634
|
// Check whether the next key exists, is not corrupt, and is the same key
|
|
623
635
|
// as the single delete.
|
|
@@ -625,6 +637,7 @@ void CompactionIterator::NextFromInput() {
|
|
|
625
637
|
ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
626
638
|
.ok() &&
|
|
627
639
|
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
|
|
640
|
+
assert(!input_.IsDeleteRangeSentinelKey());
|
|
628
641
|
#ifndef NDEBUG
|
|
629
642
|
const Compaction* c =
|
|
630
643
|
compaction_ ? compaction_->real_compaction() : nullptr;
|
|
@@ -849,12 +862,14 @@ void CompactionIterator::NextFromInput() {
|
|
|
849
862
|
// Note that a deletion marker of type kTypeDeletionWithTimestamp will be
|
|
850
863
|
// considered to have a different user key unless the timestamp is older
|
|
851
864
|
// than *full_history_ts_low_.
|
|
865
|
+
//
|
|
866
|
+
// Range tombstone start keys are skipped as they are not "real" keys.
|
|
852
867
|
while (!IsPausingManualCompaction() && !IsShuttingDown() &&
|
|
853
868
|
input_.Valid() &&
|
|
854
869
|
(ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
855
870
|
.ok()) &&
|
|
856
871
|
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) &&
|
|
857
|
-
(prev_snapshot == 0 ||
|
|
872
|
+
(prev_snapshot == 0 || input_.IsDeleteRangeSentinelKey() ||
|
|
858
873
|
DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot))) {
|
|
859
874
|
AdvanceInputIter();
|
|
860
875
|
}
|
|
@@ -1105,7 +1120,9 @@ void CompactionIterator::DecideOutputLevel() {
|
|
|
1105
1120
|
TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput.context",
|
|
1106
1121
|
&context);
|
|
1107
1122
|
output_to_penultimate_level_ = context.output_to_penultimate_level;
|
|
1108
|
-
#
|
|
1123
|
+
#else
|
|
1124
|
+
output_to_penultimate_level_ = false;
|
|
1125
|
+
#endif // NDEBUG
|
|
1109
1126
|
|
|
1110
1127
|
// if the key is newer than the cutoff sequence or within the earliest
|
|
1111
1128
|
// snapshot, it should output to the penultimate level.
|
|
@@ -1145,10 +1162,12 @@ void CompactionIterator::DecideOutputLevel() {
|
|
|
1145
1162
|
|
|
1146
1163
|
void CompactionIterator::PrepareOutput() {
|
|
1147
1164
|
if (Valid()) {
|
|
1148
|
-
if (
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1165
|
+
if (LIKELY(!is_range_del_)) {
|
|
1166
|
+
if (ikey_.type == kTypeValue) {
|
|
1167
|
+
ExtractLargeValueIfNeeded();
|
|
1168
|
+
} else if (ikey_.type == kTypeBlobIndex) {
|
|
1169
|
+
GarbageCollectBlobIfNeeded();
|
|
1170
|
+
}
|
|
1152
1171
|
}
|
|
1153
1172
|
|
|
1154
1173
|
if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) {
|
|
@@ -1171,7 +1190,7 @@ void CompactionIterator::PrepareOutput() {
|
|
|
1171
1190
|
DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
|
|
1172
1191
|
ikey_.type != kTypeMerge && current_key_committed_ &&
|
|
1173
1192
|
!output_to_penultimate_level_ &&
|
|
1174
|
-
ikey_.sequence < preserve_time_min_seqno_) {
|
|
1193
|
+
ikey_.sequence < preserve_time_min_seqno_ && !is_range_del_) {
|
|
1175
1194
|
if (ikey_.type == kTypeDeletion ||
|
|
1176
1195
|
(ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
|
|
1177
1196
|
ROCKS_LOG_FATAL(
|
|
@@ -63,6 +63,10 @@ class SequenceIterWrapper : public InternalIterator {
|
|
|
63
63
|
void SeekToLast() override { assert(false); }
|
|
64
64
|
|
|
65
65
|
uint64_t num_itered() const { return num_itered_; }
|
|
66
|
+
bool IsDeleteRangeSentinelKey() const override {
|
|
67
|
+
assert(Valid());
|
|
68
|
+
return inner_iter_->IsDeleteRangeSentinelKey();
|
|
69
|
+
}
|
|
66
70
|
|
|
67
71
|
private:
|
|
68
72
|
InternalKeyComparator icmp_;
|
|
@@ -242,7 +246,12 @@ class CompactionIterator {
|
|
|
242
246
|
const Status& status() const { return status_; }
|
|
243
247
|
const ParsedInternalKey& ikey() const { return ikey_; }
|
|
244
248
|
inline bool Valid() const { return validity_info_.IsValid(); }
|
|
245
|
-
const Slice& user_key() const {
|
|
249
|
+
const Slice& user_key() const {
|
|
250
|
+
if (UNLIKELY(is_range_del_)) {
|
|
251
|
+
return ikey_.user_key;
|
|
252
|
+
}
|
|
253
|
+
return current_user_key_;
|
|
254
|
+
}
|
|
246
255
|
const CompactionIterationStats& iter_stats() const { return iter_stats_; }
|
|
247
256
|
uint64_t num_input_entry_scanned() const { return input_.num_itered(); }
|
|
248
257
|
// If the current key should be placed on penultimate level, only valid if
|
|
@@ -252,6 +261,8 @@ class CompactionIterator {
|
|
|
252
261
|
}
|
|
253
262
|
Status InputStatus() const { return input_.status(); }
|
|
254
263
|
|
|
264
|
+
bool IsDeleteRangeSentinelKey() const { return is_range_del_; }
|
|
265
|
+
|
|
255
266
|
private:
|
|
256
267
|
// Processes the input stream to find the next output
|
|
257
268
|
void NextFromInput();
|
|
@@ -385,6 +396,7 @@ class CompactionIterator {
|
|
|
385
396
|
kKeepSD = 8,
|
|
386
397
|
kKeepDel = 9,
|
|
387
398
|
kNewUserKey = 10,
|
|
399
|
+
kRangeDeletion = 11,
|
|
388
400
|
};
|
|
389
401
|
|
|
390
402
|
struct ValidityInfo {
|
|
@@ -492,6 +504,10 @@ class CompactionIterator {
|
|
|
492
504
|
// This is a best-effort facility, so memory_order_relaxed is sufficient.
|
|
493
505
|
return manual_compaction_canceled_.load(std::memory_order_relaxed);
|
|
494
506
|
}
|
|
507
|
+
|
|
508
|
+
// Stores whether the current compaction iterator output
|
|
509
|
+
// is a range tombstone start key.
|
|
510
|
+
bool is_range_del_{false};
|
|
495
511
|
};
|
|
496
512
|
|
|
497
513
|
inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
|
|
@@ -1286,7 +1286,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1286
1286
|
while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
|
|
1287
1287
|
// Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
|
|
1288
1288
|
// returns true.
|
|
1289
|
-
|
|
1290
1289
|
assert(!end.has_value() || cfd->user_comparator()->Compare(
|
|
1291
1290
|
c_iter->user_key(), end.value()) < 0);
|
|
1292
1291
|
|
|
@@ -1834,12 +1833,14 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
|
1834
1833
|
}
|
|
1835
1834
|
|
|
1836
1835
|
// Initialize a SubcompactionState::Output and add it to sub_compact->outputs
|
|
1836
|
+
uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
|
|
1837
1837
|
{
|
|
1838
1838
|
FileMetaData meta;
|
|
1839
1839
|
meta.fd = FileDescriptor(file_number,
|
|
1840
1840
|
sub_compact->compaction->output_path_id(), 0);
|
|
1841
1841
|
meta.oldest_ancester_time = oldest_ancester_time;
|
|
1842
1842
|
meta.file_creation_time = current_time;
|
|
1843
|
+
meta.epoch_number = epoch_number;
|
|
1843
1844
|
meta.temperature = temperature;
|
|
1844
1845
|
assert(!db_id_.empty());
|
|
1845
1846
|
assert(!db_session_id_.empty());
|
|
@@ -402,6 +402,7 @@ struct CompactionServiceOutputFile {
|
|
|
402
402
|
std::string largest_internal_key;
|
|
403
403
|
uint64_t oldest_ancester_time;
|
|
404
404
|
uint64_t file_creation_time;
|
|
405
|
+
uint64_t epoch_number;
|
|
405
406
|
uint64_t paranoid_hash;
|
|
406
407
|
bool marked_for_compaction;
|
|
407
408
|
UniqueId64x2 unique_id;
|
|
@@ -411,8 +412,8 @@ struct CompactionServiceOutputFile {
|
|
|
411
412
|
const std::string& name, SequenceNumber smallest, SequenceNumber largest,
|
|
412
413
|
std::string _smallest_internal_key, std::string _largest_internal_key,
|
|
413
414
|
uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
|
|
414
|
-
uint64_t
|
|
415
|
-
UniqueId64x2 _unique_id)
|
|
415
|
+
uint64_t _epoch_number, uint64_t _paranoid_hash,
|
|
416
|
+
bool _marked_for_compaction, UniqueId64x2 _unique_id)
|
|
416
417
|
: file_name(name),
|
|
417
418
|
smallest_seqno(smallest),
|
|
418
419
|
largest_seqno(largest),
|
|
@@ -420,6 +421,7 @@ struct CompactionServiceOutputFile {
|
|
|
420
421
|
largest_internal_key(std::move(_largest_internal_key)),
|
|
421
422
|
oldest_ancester_time(_oldest_ancester_time),
|
|
422
423
|
file_creation_time(_file_creation_time),
|
|
424
|
+
epoch_number(_epoch_number),
|
|
423
425
|
paranoid_hash(_paranoid_hash),
|
|
424
426
|
marked_for_compaction(_marked_for_compaction),
|
|
425
427
|
unique_id(std::move(_unique_id)) {}
|
|
@@ -380,11 +380,13 @@ class CompactionJobTestBase : public testing::Test {
|
|
|
380
380
|
}
|
|
381
381
|
|
|
382
382
|
VersionEdit edit;
|
|
383
|
-
edit.AddFile(
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
383
|
+
edit.AddFile(
|
|
384
|
+
level, file_number, 0, file_size, smallest_key, largest_key,
|
|
385
|
+
smallest_seqno, largest_seqno, false, Temperature::kUnknown,
|
|
386
|
+
oldest_blob_file_number, kUnknownOldestAncesterTime,
|
|
387
|
+
kUnknownFileCreationTime,
|
|
388
|
+
versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(),
|
|
389
|
+
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
|
388
390
|
|
|
389
391
|
mutex_.Lock();
|
|
390
392
|
EXPECT_OK(
|
|
@@ -1655,7 +1657,7 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1655
1657
|
rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
|
|
1656
1658
|
rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
|
|
1657
1659
|
rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX),
|
|
1658
|
-
rnd64.Uniform(UINT64_MAX), rnd.OneIn(2), id);
|
|
1660
|
+
rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX), rnd.OneIn(2), id);
|
|
1659
1661
|
}
|
|
1660
1662
|
result.output_level = rnd.Uniform(10);
|
|
1661
1663
|
result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
|
|
@@ -333,8 +333,14 @@ Status CompactionOutputs::AddToOutput(
|
|
|
333
333
|
const CompactionFileOpenFunc& open_file_func,
|
|
334
334
|
const CompactionFileCloseFunc& close_file_func) {
|
|
335
335
|
Status s;
|
|
336
|
+
bool is_range_del = c_iter.IsDeleteRangeSentinelKey();
|
|
337
|
+
if (is_range_del && compaction_->bottommost_level()) {
|
|
338
|
+
// We don't consider range tombstone for bottommost level since:
|
|
339
|
+
// 1. there is no grandparent and hence no overlap to consider
|
|
340
|
+
// 2. range tombstone may be dropped at bottommost level.
|
|
341
|
+
return s;
|
|
342
|
+
}
|
|
336
343
|
const Slice& key = c_iter.key();
|
|
337
|
-
|
|
338
344
|
if (ShouldStopBefore(c_iter) && HasBuilder()) {
|
|
339
345
|
s = close_file_func(*this, c_iter.InputStatus(), key);
|
|
340
346
|
if (!s.ok()) {
|
|
@@ -344,6 +350,13 @@ Status CompactionOutputs::AddToOutput(
|
|
|
344
350
|
grandparent_boundary_switched_num_ = 0;
|
|
345
351
|
grandparent_overlapped_bytes_ =
|
|
346
352
|
GetCurrentKeyGrandparentOverlappedBytes(key);
|
|
353
|
+
if (UNLIKELY(is_range_del)) {
|
|
354
|
+
// lower bound for this new output file, this is needed as the lower bound
|
|
355
|
+
// does not come from the smallest point key in this case.
|
|
356
|
+
range_tombstone_lower_bound_.DecodeFrom(key);
|
|
357
|
+
} else {
|
|
358
|
+
range_tombstone_lower_bound_.Clear();
|
|
359
|
+
}
|
|
347
360
|
}
|
|
348
361
|
|
|
349
362
|
// Open output file if necessary
|
|
@@ -354,6 +367,17 @@ Status CompactionOutputs::AddToOutput(
|
|
|
354
367
|
}
|
|
355
368
|
}
|
|
356
369
|
|
|
370
|
+
// c_iter may emit range deletion keys, so update `last_key_for_partitioner_`
|
|
371
|
+
// here before returning below when `is_range_del` is true
|
|
372
|
+
if (partitioner_) {
|
|
373
|
+
last_key_for_partitioner_.assign(c_iter.user_key().data_,
|
|
374
|
+
c_iter.user_key().size_);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (UNLIKELY(is_range_del)) {
|
|
378
|
+
return s;
|
|
379
|
+
}
|
|
380
|
+
|
|
357
381
|
assert(builder_ != nullptr);
|
|
358
382
|
const Slice& value = c_iter.value();
|
|
359
383
|
s = current_output().validator.Add(key, value);
|
|
@@ -377,11 +401,6 @@ Status CompactionOutputs::AddToOutput(
|
|
|
377
401
|
s = current_output().meta.UpdateBoundaries(key, value, ikey.sequence,
|
|
378
402
|
ikey.type);
|
|
379
403
|
|
|
380
|
-
if (partitioner_) {
|
|
381
|
-
last_key_for_partitioner_.assign(c_iter.user_key().data_,
|
|
382
|
-
c_iter.user_key().size_);
|
|
383
|
-
}
|
|
384
|
-
|
|
385
404
|
return s;
|
|
386
405
|
}
|
|
387
406
|
|
|
@@ -398,13 +417,19 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
398
417
|
std::string smallest_user_key;
|
|
399
418
|
const Slice *lower_bound, *upper_bound;
|
|
400
419
|
bool lower_bound_from_sub_compact = false;
|
|
401
|
-
|
|
420
|
+
bool lower_bound_from_range_tombstone = false;
|
|
402
421
|
size_t output_size = outputs_.size();
|
|
403
422
|
if (output_size == 1) {
|
|
404
423
|
// For the first output table, include range tombstones before the min
|
|
405
424
|
// key but after the subcompaction boundary.
|
|
406
425
|
lower_bound = comp_start_user_key;
|
|
407
426
|
lower_bound_from_sub_compact = true;
|
|
427
|
+
} else if (range_tombstone_lower_bound_.size() > 0) {
|
|
428
|
+
assert(meta.smallest.size() == 0 ||
|
|
429
|
+
icmp.Compare(range_tombstone_lower_bound_, meta.smallest) <= 0);
|
|
430
|
+
lower_bound_guard = range_tombstone_lower_bound_.user_key();
|
|
431
|
+
lower_bound = &lower_bound_guard;
|
|
432
|
+
lower_bound_from_range_tombstone = true;
|
|
408
433
|
} else if (meta.smallest.size() > 0) {
|
|
409
434
|
// For subsequent output tables, only include range tombstones from min
|
|
410
435
|
// key onwards since the previous file was extended to contain range
|
|
@@ -532,6 +557,39 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
532
557
|
smallest_candidate =
|
|
533
558
|
InternalKey(*lower_bound, tombstone.seq_, kTypeRangeDeletion);
|
|
534
559
|
}
|
|
560
|
+
} else if (lower_bound_from_range_tombstone) {
|
|
561
|
+
// Range tombstone keys can be truncated at file boundaries of the files
|
|
562
|
+
// that contain them.
|
|
563
|
+
//
|
|
564
|
+
// If this lower bound is from a range tombstone key that is not
|
|
565
|
+
// truncated, i.e., it was not truncated when reading from the input
|
|
566
|
+
// files, then its sequence number and `op_type` will be
|
|
567
|
+
// kMaxSequenceNumber and kTypeRangeDeletion (see
|
|
568
|
+
// TruncatedRangeDelIterator::start_key()). In this case, when this key
|
|
569
|
+
// was used as the upper bound to cut the previous compaction output
|
|
570
|
+
// file, the previous file's largest key could have the same value as
|
|
571
|
+
// this key (see the upperbound logic below). To guarantee
|
|
572
|
+
// non-overlapping ranges between output files, we use the range
|
|
573
|
+
// tombstone's actual sequence number (tombstone.seq_) for the lower
|
|
574
|
+
// bound of this file. If this range tombstone key is truncated, then
|
|
575
|
+
// the previous file's largest key will be smaller than this range
|
|
576
|
+
// tombstone key, so we can use it as the lower bound directly.
|
|
577
|
+
if (ExtractInternalKeyFooter(range_tombstone_lower_bound_.Encode()) ==
|
|
578
|
+
kRangeTombstoneSentinel) {
|
|
579
|
+
if (ts_sz) {
|
|
580
|
+
smallest_candidate =
|
|
581
|
+
InternalKey(range_tombstone_lower_bound_.user_key(),
|
|
582
|
+
tombstone.seq_, kTypeRangeDeletion, tombstone.ts_);
|
|
583
|
+
} else {
|
|
584
|
+
smallest_candidate =
|
|
585
|
+
InternalKey(range_tombstone_lower_bound_.user_key(),
|
|
586
|
+
tombstone.seq_, kTypeRangeDeletion);
|
|
587
|
+
}
|
|
588
|
+
} else {
|
|
589
|
+
assert(GetInternalKeySeqno(range_tombstone_lower_bound_.Encode()) <
|
|
590
|
+
kMaxSequenceNumber);
|
|
591
|
+
smallest_candidate = range_tombstone_lower_bound_;
|
|
592
|
+
}
|
|
535
593
|
} else {
|
|
536
594
|
smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
|
|
537
595
|
}
|
|
@@ -307,6 +307,7 @@ class CompactionOutputs {
|
|
|
307
307
|
std::unique_ptr<SstPartitioner> partitioner_;
|
|
308
308
|
|
|
309
309
|
// A flag determines if this subcompaction has been split by the cursor
|
|
310
|
+
// for RoundRobin compaction
|
|
310
311
|
bool is_split_ = false;
|
|
311
312
|
|
|
312
313
|
// We also maintain the output split key for each subcompaction to avoid
|
|
@@ -338,6 +339,10 @@ class CompactionOutputs {
|
|
|
338
339
|
// for the current output file, how many file boundaries has it crossed,
|
|
339
340
|
// basically number of files overlapped * 2
|
|
340
341
|
size_t grandparent_boundary_switched_num_ = 0;
|
|
342
|
+
|
|
343
|
+
// The smallest key of the current output file, this is set when current
|
|
344
|
+
// output file's smallest key is a range tombstone start key.
|
|
345
|
+
InternalKey range_tombstone_lower_bound_;
|
|
341
346
|
};
|
|
342
347
|
|
|
343
348
|
// helper struct to concatenate the last level and penultimate level outputs
|
|
@@ -31,27 +31,15 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
|
|
|
31
31
|
size_t min_files_to_compact,
|
|
32
32
|
uint64_t max_compact_bytes_per_del_file,
|
|
33
33
|
uint64_t max_compaction_bytes,
|
|
34
|
-
CompactionInputFiles* comp_inputs
|
|
35
|
-
const SequenceNumber earliest_mem_seqno) {
|
|
36
|
-
// Do not pick ingested file when there is at least one memtable not flushed
|
|
37
|
-
// which of seqno is overlap with the sst.
|
|
34
|
+
CompactionInputFiles* comp_inputs) {
|
|
38
35
|
TEST_SYNC_POINT("FindIntraL0Compaction");
|
|
36
|
+
|
|
39
37
|
size_t start = 0;
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
return false;
|
|
43
|
-
}
|
|
44
|
-
// If there is no data in memtable, the earliest sequence number would the
|
|
45
|
-
// largest sequence number in last memtable.
|
|
46
|
-
// Because all files are sorted in descending order by largest_seqno, so we
|
|
47
|
-
// only need to check the first one.
|
|
48
|
-
if (level_files[start]->fd.largest_seqno <= earliest_mem_seqno) {
|
|
49
|
-
break;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
if (start >= level_files.size()) {
|
|
38
|
+
|
|
39
|
+
if (level_files.size() == 0 || level_files[start]->being_compacted) {
|
|
53
40
|
return false;
|
|
54
41
|
}
|
|
42
|
+
|
|
55
43
|
size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
|
|
56
44
|
size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
|
|
57
45
|
// Compaction range will be [start, limit).
|
|
@@ -613,8 +601,7 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
613
601
|
int input_level, int output_level,
|
|
614
602
|
const CompactRangeOptions& compact_range_options, const InternalKey* begin,
|
|
615
603
|
const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict,
|
|
616
|
-
uint64_t max_file_num_to_ignore, const std::string& trim_ts
|
|
617
|
-
const SequenceNumber /*earliest_mem_seqno*/) {
|
|
604
|
+
uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
|
|
618
605
|
// CompactionPickerFIFO has its own implementation of compact range
|
|
619
606
|
assert(ioptions_.compaction_style != kCompactionStyleFIFO);
|
|
620
607
|
|
|
@@ -919,8 +906,7 @@ bool HaveOverlappingKeyRanges(const Comparator* c, const SstFileMetaData& a,
|
|
|
919
906
|
|
|
920
907
|
Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
|
|
921
908
|
std::unordered_set<uint64_t>* input_files,
|
|
922
|
-
const ColumnFamilyMetaData& cf_meta, const int output_level
|
|
923
|
-
const SequenceNumber earliest_mem_seqno) const {
|
|
909
|
+
const ColumnFamilyMetaData& cf_meta, const int output_level) const {
|
|
924
910
|
auto& levels = cf_meta.levels;
|
|
925
911
|
auto comparator = icmp_->user_comparator();
|
|
926
912
|
|
|
@@ -997,12 +983,6 @@ Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
|
|
|
997
983
|
current_files[f].name +
|
|
998
984
|
" is currently being compacted.");
|
|
999
985
|
}
|
|
1000
|
-
if (output_level == 0 &&
|
|
1001
|
-
current_files[f].largest_seqno > earliest_mem_seqno) {
|
|
1002
|
-
return Status::Aborted(
|
|
1003
|
-
"Necessary compaction input file " + current_files[f].name +
|
|
1004
|
-
" has overlapping seqnos with earliest memtable seqnos.");
|
|
1005
|
-
}
|
|
1006
986
|
|
|
1007
987
|
input_files->insert(TableFileNameToNumber(current_files[f].name));
|
|
1008
988
|
}
|
|
@@ -1060,14 +1040,12 @@ Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
|
|
|
1060
1040
|
"A running compaction is writing to the same output level in an "
|
|
1061
1041
|
"overlapping key range");
|
|
1062
1042
|
}
|
|
1063
|
-
|
|
1064
1043
|
return Status::OK();
|
|
1065
1044
|
}
|
|
1066
1045
|
|
|
1067
1046
|
Status CompactionPicker::SanitizeCompactionInputFiles(
|
|
1068
1047
|
std::unordered_set<uint64_t>* input_files,
|
|
1069
|
-
const ColumnFamilyMetaData& cf_meta, const int output_level
|
|
1070
|
-
const SequenceNumber earliest_mem_seqno) const {
|
|
1048
|
+
const ColumnFamilyMetaData& cf_meta, const int output_level) const {
|
|
1071
1049
|
assert(static_cast<int>(cf_meta.levels.size()) - 1 ==
|
|
1072
1050
|
cf_meta.levels[cf_meta.levels.size() - 1].level);
|
|
1073
1051
|
if (output_level >= static_cast<int>(cf_meta.levels.size())) {
|
|
@@ -1093,8 +1071,8 @@ Status CompactionPicker::SanitizeCompactionInputFiles(
|
|
|
1093
1071
|
"A compaction must contain at least one file.");
|
|
1094
1072
|
}
|
|
1095
1073
|
|
|
1096
|
-
Status s = SanitizeCompactionInputFilesForAllLevels(
|
|
1097
|
-
|
|
1074
|
+
Status s = SanitizeCompactionInputFilesForAllLevels(input_files, cf_meta,
|
|
1075
|
+
output_level);
|
|
1098
1076
|
|
|
1099
1077
|
if (!s.ok()) {
|
|
1100
1078
|
return s;
|