@nxtedition/rocksdb 7.0.23 → 7.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +3 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
- package/deps/rocksdb/rocksdb/Makefile +6 -2
- package/deps/rocksdb/rocksdb/TARGETS +14 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
- package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
- package/deps/rocksdb/rocksdb/db/c.cc +68 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
- package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
- package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
- package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
- package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
- package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
- package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
- package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
- package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
- package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
- package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
- package/deps/rocksdb/rocksdb/src.mk +5 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/util/compression.h +2 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
- package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb.gyp +5 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
//
|
|
7
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
8
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
9
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
10
|
+
|
|
11
|
+
#include "db/compaction/compaction_state.h"
|
|
12
|
+
|
|
13
|
+
namespace ROCKSDB_NAMESPACE {
|
|
14
|
+
|
|
15
|
+
Slice CompactionState::SmallestUserKey() {
|
|
16
|
+
for (const auto& sub_compact_state : sub_compact_states) {
|
|
17
|
+
Slice smallest = sub_compact_state.SmallestUserKey();
|
|
18
|
+
if (!smallest.empty()) {
|
|
19
|
+
return smallest;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// If there is no finished output, return an empty slice.
|
|
23
|
+
return Slice{nullptr, 0};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
Slice CompactionState::LargestUserKey() {
|
|
27
|
+
for (auto it = sub_compact_states.rbegin(); it < sub_compact_states.rend();
|
|
28
|
+
++it) {
|
|
29
|
+
Slice largest = it->LargestUserKey();
|
|
30
|
+
if (!largest.empty()) {
|
|
31
|
+
return largest;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// If there is no finished output, return an empty slice.
|
|
35
|
+
return Slice{nullptr, 0};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
void CompactionState::AggregateCompactionStats(
|
|
39
|
+
InternalStats::CompactionStatsFull& compaction_stats,
|
|
40
|
+
CompactionJobStats& compaction_job_stats) {
|
|
41
|
+
for (const auto& sc : sub_compact_states) {
|
|
42
|
+
sc.AggregateCompactionStats(compaction_stats);
|
|
43
|
+
compaction_job_stats.Add(sc.compaction_job_stats);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
//
|
|
7
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
8
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
9
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include "db/compaction/compaction.h"
|
|
14
|
+
#include "db/compaction/subcompaction_state.h"
|
|
15
|
+
#include "db/internal_stats.h"
|
|
16
|
+
|
|
17
|
+
// Data structures used for compaction_job and compaction_service_job which has
|
|
18
|
+
// the list of sub_compact_states and the aggregated information for the
|
|
19
|
+
// compaction.
|
|
20
|
+
namespace ROCKSDB_NAMESPACE {
|
|
21
|
+
|
|
22
|
+
// Maintains state for the entire compaction
|
|
23
|
+
class CompactionState {
|
|
24
|
+
public:
|
|
25
|
+
Compaction* const compaction;
|
|
26
|
+
|
|
27
|
+
// REQUIRED: subcompaction states are stored in order of increasing key-range
|
|
28
|
+
std::vector<SubcompactionState> sub_compact_states;
|
|
29
|
+
Status status;
|
|
30
|
+
|
|
31
|
+
void AggregateCompactionStats(
|
|
32
|
+
InternalStats::CompactionStatsFull& compaction_stats,
|
|
33
|
+
CompactionJobStats& compaction_job_stats);
|
|
34
|
+
|
|
35
|
+
explicit CompactionState(Compaction* c) : compaction(c) {}
|
|
36
|
+
|
|
37
|
+
Slice SmallestUserKey();
|
|
38
|
+
|
|
39
|
+
Slice LargestUserKey();
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
//
|
|
7
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
8
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
9
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
10
|
+
|
|
11
|
+
#include "db/compaction/subcompaction_state.h"
|
|
12
|
+
|
|
13
|
+
#include "rocksdb/sst_partitioner.h"
|
|
14
|
+
|
|
15
|
+
namespace ROCKSDB_NAMESPACE {
|
|
16
|
+
void SubcompactionState::AggregateCompactionStats(
|
|
17
|
+
InternalStats::CompactionStatsFull& compaction_stats) const {
|
|
18
|
+
compaction_stats.stats.Add(compaction_outputs_.stats_);
|
|
19
|
+
if (HasPenultimateLevelOutputs()) {
|
|
20
|
+
compaction_stats.has_penultimate_level_output = true;
|
|
21
|
+
compaction_stats.penultimate_level_stats.Add(
|
|
22
|
+
penultimate_level_outputs_.stats_);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
void SubcompactionState::FillFilesToCutForTtl() {
|
|
27
|
+
if (compaction->immutable_options()->compaction_style !=
|
|
28
|
+
CompactionStyle::kCompactionStyleLevel ||
|
|
29
|
+
compaction->immutable_options()->compaction_pri !=
|
|
30
|
+
CompactionPri::kMinOverlappingRatio ||
|
|
31
|
+
compaction->mutable_cf_options()->ttl == 0 ||
|
|
32
|
+
compaction->num_input_levels() < 2 || compaction->bottommost_level()) {
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// We define new file with the oldest ancestor time to be younger than 1/4
|
|
37
|
+
// TTL, and an old one to be older than 1/2 TTL time.
|
|
38
|
+
int64_t temp_current_time;
|
|
39
|
+
auto get_time_status = compaction->immutable_options()->clock->GetCurrentTime(
|
|
40
|
+
&temp_current_time);
|
|
41
|
+
if (!get_time_status.ok()) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
auto current_time = static_cast<uint64_t>(temp_current_time);
|
|
45
|
+
if (current_time < compaction->mutable_cf_options()->ttl) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
uint64_t old_age_thres =
|
|
49
|
+
current_time - compaction->mutable_cf_options()->ttl / 2;
|
|
50
|
+
|
|
51
|
+
const std::vector<FileMetaData*>& olevel =
|
|
52
|
+
*(compaction->inputs(compaction->num_input_levels() - 1));
|
|
53
|
+
for (FileMetaData* file : olevel) {
|
|
54
|
+
// Worth filtering out by start and end?
|
|
55
|
+
uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
|
|
56
|
+
// We put old files if they are not too small to prevent a flood
|
|
57
|
+
// of small files.
|
|
58
|
+
if (oldest_ancester_time < old_age_thres &&
|
|
59
|
+
file->fd.GetFileSize() >
|
|
60
|
+
compaction->mutable_cf_options()->target_file_size_base / 2) {
|
|
61
|
+
files_to_cut_for_ttl_.push_back(file);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
OutputIterator SubcompactionState::GetOutputs() const {
|
|
67
|
+
return OutputIterator(penultimate_level_outputs_.outputs_,
|
|
68
|
+
compaction_outputs_.outputs_);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
void SubcompactionState::Cleanup(Cache* cache) {
|
|
72
|
+
penultimate_level_outputs_.Cleanup();
|
|
73
|
+
compaction_outputs_.Cleanup();
|
|
74
|
+
|
|
75
|
+
if (!status.ok()) {
|
|
76
|
+
for (const auto& out : GetOutputs()) {
|
|
77
|
+
// If this file was inserted into the table cache then remove
|
|
78
|
+
// them here because this compaction was not committed.
|
|
79
|
+
TableCache::Evict(cache, out.meta.fd.GetNumber());
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// TODO: sub_compact.io_status is not checked like status. Not sure if thats
|
|
83
|
+
// intentional. So ignoring the io_status as of now.
|
|
84
|
+
io_status.PermitUncheckedError();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
Slice SubcompactionState::SmallestUserKey() const {
|
|
88
|
+
if (has_penultimate_level_outputs_) {
|
|
89
|
+
Slice a = compaction_outputs_.SmallestUserKey();
|
|
90
|
+
Slice b = penultimate_level_outputs_.SmallestUserKey();
|
|
91
|
+
if (a.empty()) {
|
|
92
|
+
return b;
|
|
93
|
+
}
|
|
94
|
+
if (b.empty()) {
|
|
95
|
+
return a;
|
|
96
|
+
}
|
|
97
|
+
const Comparator* user_cmp =
|
|
98
|
+
compaction->column_family_data()->user_comparator();
|
|
99
|
+
if (user_cmp->Compare(a, b) > 0) {
|
|
100
|
+
return b;
|
|
101
|
+
} else {
|
|
102
|
+
return a;
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
return compaction_outputs_.SmallestUserKey();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
Slice SubcompactionState::LargestUserKey() const {
|
|
110
|
+
if (has_penultimate_level_outputs_) {
|
|
111
|
+
Slice a = compaction_outputs_.LargestUserKey();
|
|
112
|
+
Slice b = penultimate_level_outputs_.LargestUserKey();
|
|
113
|
+
if (a.empty()) {
|
|
114
|
+
return b;
|
|
115
|
+
}
|
|
116
|
+
if (b.empty()) {
|
|
117
|
+
return a;
|
|
118
|
+
}
|
|
119
|
+
const Comparator* user_cmp =
|
|
120
|
+
compaction->column_family_data()->user_comparator();
|
|
121
|
+
if (user_cmp->Compare(a, b) < 0) {
|
|
122
|
+
return b;
|
|
123
|
+
} else {
|
|
124
|
+
return a;
|
|
125
|
+
}
|
|
126
|
+
} else {
|
|
127
|
+
return compaction_outputs_.LargestUserKey();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
bool SubcompactionState::ShouldStopBefore(const Slice& internal_key) {
|
|
132
|
+
uint64_t curr_file_size = Current().GetCurrentOutputFileSize();
|
|
133
|
+
const InternalKeyComparator* icmp =
|
|
134
|
+
&compaction->column_family_data()->internal_comparator();
|
|
135
|
+
|
|
136
|
+
// Invalid local_output_split_key indicates that we do not need to split
|
|
137
|
+
if (local_output_split_key_ != nullptr && !is_split_) {
|
|
138
|
+
// Split occurs when the next key is larger than/equal to the cursor
|
|
139
|
+
if (icmp->Compare(internal_key, local_output_split_key_->Encode()) >= 0) {
|
|
140
|
+
is_split_ = true;
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const std::vector<FileMetaData*>& grandparents = compaction->grandparents();
|
|
146
|
+
bool grandparant_file_switched = false;
|
|
147
|
+
// Scan to find the earliest grandparent file that contains key.
|
|
148
|
+
while (grandparent_index_ < grandparents.size() &&
|
|
149
|
+
icmp->Compare(internal_key,
|
|
150
|
+
grandparents[grandparent_index_]->largest.Encode()) >
|
|
151
|
+
0) {
|
|
152
|
+
if (seen_key_) {
|
|
153
|
+
overlapped_bytes_ += grandparents[grandparent_index_]->fd.GetFileSize();
|
|
154
|
+
grandparant_file_switched = true;
|
|
155
|
+
}
|
|
156
|
+
assert(grandparent_index_ + 1 >= grandparents.size() ||
|
|
157
|
+
icmp->Compare(
|
|
158
|
+
grandparents[grandparent_index_]->largest.Encode(),
|
|
159
|
+
grandparents[grandparent_index_ + 1]->smallest.Encode()) <= 0);
|
|
160
|
+
grandparent_index_++;
|
|
161
|
+
}
|
|
162
|
+
seen_key_ = true;
|
|
163
|
+
|
|
164
|
+
if (grandparant_file_switched &&
|
|
165
|
+
overlapped_bytes_ + curr_file_size > compaction->max_compaction_bytes()) {
|
|
166
|
+
// Too much overlap for current output; start new output
|
|
167
|
+
overlapped_bytes_ = 0;
|
|
168
|
+
return true;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (!files_to_cut_for_ttl_.empty()) {
|
|
172
|
+
if (cur_files_to_cut_for_ttl_ != -1) {
|
|
173
|
+
// Previous key is inside the range of a file
|
|
174
|
+
if (icmp->Compare(internal_key,
|
|
175
|
+
files_to_cut_for_ttl_[cur_files_to_cut_for_ttl_]
|
|
176
|
+
->largest.Encode()) > 0) {
|
|
177
|
+
next_files_to_cut_for_ttl_ = cur_files_to_cut_for_ttl_ + 1;
|
|
178
|
+
cur_files_to_cut_for_ttl_ = -1;
|
|
179
|
+
return true;
|
|
180
|
+
}
|
|
181
|
+
} else {
|
|
182
|
+
// Look for the key position
|
|
183
|
+
while (next_files_to_cut_for_ttl_ <
|
|
184
|
+
static_cast<int>(files_to_cut_for_ttl_.size())) {
|
|
185
|
+
if (icmp->Compare(internal_key,
|
|
186
|
+
files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
|
|
187
|
+
->smallest.Encode()) >= 0) {
|
|
188
|
+
if (icmp->Compare(internal_key,
|
|
189
|
+
files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
|
|
190
|
+
->largest.Encode()) <= 0) {
|
|
191
|
+
// With in the current file
|
|
192
|
+
cur_files_to_cut_for_ttl_ = next_files_to_cut_for_ttl_;
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
195
|
+
// Beyond the current file
|
|
196
|
+
next_files_to_cut_for_ttl_++;
|
|
197
|
+
} else {
|
|
198
|
+
// Still fall into the gap
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
Status SubcompactionState::AddToOutput(
|
|
209
|
+
const CompactionIterator& iter,
|
|
210
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
211
|
+
const CompactionFileCloseFunc& close_file_func) {
|
|
212
|
+
// update target output first
|
|
213
|
+
is_current_penultimate_level_ = iter.output_to_penultimate_level();
|
|
214
|
+
current_outputs_ = is_current_penultimate_level_ ? &penultimate_level_outputs_
|
|
215
|
+
: &compaction_outputs_;
|
|
216
|
+
if (is_current_penultimate_level_) {
|
|
217
|
+
has_penultimate_level_outputs_ = true;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return Current().AddToOutput(iter, open_file_func, close_file_func);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
//
|
|
7
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
8
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
9
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include "db/blob/blob_file_addition.h"
|
|
14
|
+
#include "db/blob/blob_garbage_meter.h"
|
|
15
|
+
#include "db/compaction/compaction.h"
|
|
16
|
+
#include "db/compaction/compaction_iterator.h"
|
|
17
|
+
#include "db/compaction/compaction_outputs.h"
|
|
18
|
+
#include "db/internal_stats.h"
|
|
19
|
+
#include "db/output_validator.h"
|
|
20
|
+
#include "db/range_del_aggregator.h"
|
|
21
|
+
|
|
22
|
+
namespace ROCKSDB_NAMESPACE {
|
|
23
|
+
|
|
24
|
+
// Maintains state and outputs for each sub-compaction
|
|
25
|
+
// It contains 2 `CompactionOutputs`:
|
|
26
|
+
// 1. one for the normal output files
|
|
27
|
+
// 2. another for the penultimate level outputs
|
|
28
|
+
// a `current` pointer maintains the current output group, when calling
|
|
29
|
+
// `AddToOutput()`, it checks the output of the current compaction_iterator key
|
|
30
|
+
// and point `current` to the target output group. By default, it just points to
|
|
31
|
+
// normal compaction_outputs, if the compaction_iterator key should be placed on
|
|
32
|
+
// the penultimate level, `current` is changed to point to
|
|
33
|
+
// `penultimate_level_outputs`.
|
|
34
|
+
// The later operations uses `Current()` to get the target group.
|
|
35
|
+
//
|
|
36
|
+
// +----------+ +-----------------------------+ +---------+
|
|
37
|
+
// | *current |--------> | compaction_outputs |----->| output |
|
|
38
|
+
// +----------+ +-----------------------------+ +---------+
|
|
39
|
+
// | | output |
|
|
40
|
+
// | +---------+
|
|
41
|
+
// | | ... |
|
|
42
|
+
// |
|
|
43
|
+
// | +-----------------------------+ +---------+
|
|
44
|
+
// +-------------> | penultimate_level_outputs |----->| output |
|
|
45
|
+
// +-----------------------------+ +---------+
|
|
46
|
+
// | ... |
|
|
47
|
+
|
|
48
|
+
class SubcompactionState {
|
|
49
|
+
public:
|
|
50
|
+
const Compaction* compaction;
|
|
51
|
+
|
|
52
|
+
// The boundaries of the key-range this compaction is interested in. No two
|
|
53
|
+
// sub-compactions may have overlapping key-ranges.
|
|
54
|
+
// 'start' is inclusive, 'end' is exclusive, and nullptr means unbounded
|
|
55
|
+
const Slice *start, *end;
|
|
56
|
+
|
|
57
|
+
// The return status of this sub-compaction
|
|
58
|
+
Status status;
|
|
59
|
+
|
|
60
|
+
// The return IO Status of this sub-compaction
|
|
61
|
+
IOStatus io_status;
|
|
62
|
+
|
|
63
|
+
// Notify on sub-compaction completion only if listener was notified on
|
|
64
|
+
// sub-compaction begin.
|
|
65
|
+
bool notify_on_subcompaction_completion = false;
|
|
66
|
+
|
|
67
|
+
// compaction job stats for this sub-compaction
|
|
68
|
+
CompactionJobStats compaction_job_stats;
|
|
69
|
+
|
|
70
|
+
// sub-compaction job id, which is used to identify different sub-compaction
|
|
71
|
+
// within the same compaction job.
|
|
72
|
+
const uint32_t sub_job_id;
|
|
73
|
+
|
|
74
|
+
Slice SmallestUserKey() const;
|
|
75
|
+
|
|
76
|
+
Slice LargestUserKey() const;
|
|
77
|
+
|
|
78
|
+
// Get all outputs from the subcompaction. For per_key_placement compaction,
|
|
79
|
+
// it returns both the last level outputs and penultimate level outputs.
|
|
80
|
+
OutputIterator GetOutputs() const;
|
|
81
|
+
|
|
82
|
+
// Assign range dels aggregator, for each range_del, it can only be assigned
|
|
83
|
+
// to one output level, for per_key_placement, it's going to be the
|
|
84
|
+
// penultimate level.
|
|
85
|
+
void AssignRangeDelAggregator(
|
|
86
|
+
std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
|
|
87
|
+
if (compaction->SupportsPerKeyPlacement()) {
|
|
88
|
+
penultimate_level_outputs_.AssignRangeDelAggregator(
|
|
89
|
+
std::move(range_del_agg));
|
|
90
|
+
} else {
|
|
91
|
+
compaction_outputs_.AssignRangeDelAggregator(std::move(range_del_agg));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
void RemoveLastEmptyOutput() {
|
|
96
|
+
compaction_outputs_.RemoveLastEmptyOutput();
|
|
97
|
+
penultimate_level_outputs_.RemoveLastEmptyOutput();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
#ifndef ROCKSDB_LITE
|
|
101
|
+
void BuildSubcompactionJobInfo(
|
|
102
|
+
SubcompactionJobInfo& subcompaction_job_info) const {
|
|
103
|
+
const Compaction* c = compaction;
|
|
104
|
+
const ColumnFamilyData* cfd = c->column_family_data();
|
|
105
|
+
|
|
106
|
+
subcompaction_job_info.cf_id = cfd->GetID();
|
|
107
|
+
subcompaction_job_info.cf_name = cfd->GetName();
|
|
108
|
+
subcompaction_job_info.status = status;
|
|
109
|
+
subcompaction_job_info.subcompaction_job_id = static_cast<int>(sub_job_id);
|
|
110
|
+
subcompaction_job_info.base_input_level = c->start_level();
|
|
111
|
+
subcompaction_job_info.output_level = c->output_level();
|
|
112
|
+
subcompaction_job_info.stats = compaction_job_stats;
|
|
113
|
+
}
|
|
114
|
+
#endif // !ROCKSDB_LITE
|
|
115
|
+
|
|
116
|
+
SubcompactionState() = delete;
|
|
117
|
+
SubcompactionState(const SubcompactionState&) = delete;
|
|
118
|
+
SubcompactionState& operator=(const SubcompactionState&) = delete;
|
|
119
|
+
|
|
120
|
+
SubcompactionState(Compaction* c, Slice* _start, Slice* _end,
|
|
121
|
+
uint32_t _sub_job_id)
|
|
122
|
+
: compaction(c),
|
|
123
|
+
start(_start),
|
|
124
|
+
end(_end),
|
|
125
|
+
sub_job_id(_sub_job_id),
|
|
126
|
+
compaction_outputs_(c, /*is_penultimate_level=*/false),
|
|
127
|
+
penultimate_level_outputs_(c, /*is_penultimate_level=*/true) {
|
|
128
|
+
assert(compaction != nullptr);
|
|
129
|
+
const InternalKeyComparator* icmp =
|
|
130
|
+
&compaction->column_family_data()->internal_comparator();
|
|
131
|
+
const InternalKey* output_split_key = compaction->GetOutputSplitKey();
|
|
132
|
+
// Invalid output_split_key indicates that we do not need to split
|
|
133
|
+
if (output_split_key != nullptr) {
|
|
134
|
+
// We may only split the output when the cursor is in the range. Split
|
|
135
|
+
if ((end == nullptr || icmp->user_comparator()->Compare(
|
|
136
|
+
ExtractUserKey(output_split_key->Encode()),
|
|
137
|
+
ExtractUserKey(*end)) < 0) &&
|
|
138
|
+
(start == nullptr || icmp->user_comparator()->Compare(
|
|
139
|
+
ExtractUserKey(output_split_key->Encode()),
|
|
140
|
+
ExtractUserKey(*start)) > 0)) {
|
|
141
|
+
local_output_split_key_ = output_split_key;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
SubcompactionState(SubcompactionState&& state) noexcept
|
|
147
|
+
: compaction(state.compaction),
|
|
148
|
+
start(state.start),
|
|
149
|
+
end(state.end),
|
|
150
|
+
status(std::move(state.status)),
|
|
151
|
+
io_status(std::move(state.io_status)),
|
|
152
|
+
notify_on_subcompaction_completion(
|
|
153
|
+
state.notify_on_subcompaction_completion),
|
|
154
|
+
compaction_job_stats(std::move(state.compaction_job_stats)),
|
|
155
|
+
sub_job_id(state.sub_job_id),
|
|
156
|
+
files_to_cut_for_ttl_(std::move(state.files_to_cut_for_ttl_)),
|
|
157
|
+
cur_files_to_cut_for_ttl_(state.cur_files_to_cut_for_ttl_),
|
|
158
|
+
next_files_to_cut_for_ttl_(state.next_files_to_cut_for_ttl_),
|
|
159
|
+
grandparent_index_(state.grandparent_index_),
|
|
160
|
+
overlapped_bytes_(state.overlapped_bytes_),
|
|
161
|
+
seen_key_(state.seen_key_),
|
|
162
|
+
compaction_outputs_(std::move(state.compaction_outputs_)),
|
|
163
|
+
penultimate_level_outputs_(std::move(state.penultimate_level_outputs_)),
|
|
164
|
+
is_current_penultimate_level_(state.is_current_penultimate_level_),
|
|
165
|
+
has_penultimate_level_outputs_(state.has_penultimate_level_outputs_) {
|
|
166
|
+
current_outputs_ = is_current_penultimate_level_
|
|
167
|
+
? &penultimate_level_outputs_
|
|
168
|
+
: &compaction_outputs_;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
bool HasPenultimateLevelOutputs() const {
|
|
172
|
+
return has_penultimate_level_outputs_ ||
|
|
173
|
+
penultimate_level_outputs_.HasRangeDel();
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
void FillFilesToCutForTtl();
|
|
177
|
+
|
|
178
|
+
// Returns true iff we should stop building the current output
|
|
179
|
+
// before processing "internal_key".
|
|
180
|
+
bool ShouldStopBefore(const Slice& internal_key);
|
|
181
|
+
|
|
182
|
+
bool IsCurrentPenultimateLevel() const {
|
|
183
|
+
return is_current_penultimate_level_;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Add all the new files from this compaction to version_edit
|
|
187
|
+
void AddOutputsEdit(VersionEdit* out_edit) const {
|
|
188
|
+
for (const auto& file : penultimate_level_outputs_.outputs_) {
|
|
189
|
+
out_edit->AddFile(compaction->GetPenultimateLevel(), file.meta);
|
|
190
|
+
}
|
|
191
|
+
for (const auto& file : compaction_outputs_.outputs_) {
|
|
192
|
+
out_edit->AddFile(compaction->output_level(), file.meta);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void Cleanup(Cache* cache);
|
|
197
|
+
|
|
198
|
+
void AggregateCompactionStats(
|
|
199
|
+
InternalStats::CompactionStatsFull& compaction_stats) const;
|
|
200
|
+
|
|
201
|
+
CompactionOutputs& Current() const {
|
|
202
|
+
assert(current_outputs_);
|
|
203
|
+
return *current_outputs_;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Add compaction_iterator key/value to the `Current` output group.
|
|
207
|
+
Status AddToOutput(const CompactionIterator& iter,
|
|
208
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
209
|
+
const CompactionFileCloseFunc& close_file_func);
|
|
210
|
+
|
|
211
|
+
// Close all compaction output files, both output_to_penultimate_level outputs
|
|
212
|
+
// and normal outputs.
|
|
213
|
+
Status CloseCompactionFiles(const Status& curr_status,
|
|
214
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
215
|
+
const CompactionFileCloseFunc& close_file_func) {
|
|
216
|
+
// Call FinishCompactionOutputFile() even if status is not ok: it needs to
|
|
217
|
+
// close the output file.
|
|
218
|
+
Status s = penultimate_level_outputs_.CloseOutput(
|
|
219
|
+
curr_status, open_file_func, close_file_func);
|
|
220
|
+
s = compaction_outputs_.CloseOutput(s, open_file_func, close_file_func);
|
|
221
|
+
return s;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
private:
|
|
225
|
+
// Some identified files with old oldest ancester time and the range should be
|
|
226
|
+
// isolated out so that the output file(s) in that range can be merged down
|
|
227
|
+
// for TTL and clear the timestamps for the range.
|
|
228
|
+
std::vector<FileMetaData*> files_to_cut_for_ttl_;
|
|
229
|
+
int cur_files_to_cut_for_ttl_ = -1;
|
|
230
|
+
int next_files_to_cut_for_ttl_ = 0;
|
|
231
|
+
|
|
232
|
+
// An index that used to speed up ShouldStopBefore().
|
|
233
|
+
size_t grandparent_index_ = 0;
|
|
234
|
+
// The number of bytes overlapping between the current output and
|
|
235
|
+
// grandparent files used in ShouldStopBefore().
|
|
236
|
+
uint64_t overlapped_bytes_ = 0;
|
|
237
|
+
// A flag determines whether the key has been seen in ShouldStopBefore()
|
|
238
|
+
bool seen_key_ = false;
|
|
239
|
+
|
|
240
|
+
// A flag determines if this subcompaction has been split by the cursor
|
|
241
|
+
bool is_split_ = false;
|
|
242
|
+
|
|
243
|
+
// We also maintain the output split key for each subcompaction to avoid
|
|
244
|
+
// repetitive comparison in ShouldStopBefore()
|
|
245
|
+
const InternalKey* local_output_split_key_ = nullptr;
|
|
246
|
+
|
|
247
|
+
// State kept for output being generated
|
|
248
|
+
CompactionOutputs compaction_outputs_;
|
|
249
|
+
CompactionOutputs penultimate_level_outputs_;
|
|
250
|
+
CompactionOutputs* current_outputs_ = &compaction_outputs_;
|
|
251
|
+
bool is_current_penultimate_level_ = false;
|
|
252
|
+
bool has_penultimate_level_outputs_ = false;
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
} // namespace ROCKSDB_NAMESPACE
|