@nxtedition/rocksdb 7.0.23 → 7.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +3 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/deps/rocksdb/rocksdb/src.mk +5 -0
  122. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  127. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  131. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  133. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  135. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  136. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  137. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  138. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  143. package/deps/rocksdb/rocksdb.gyp +5 -1
  144. package/package.json +1 -1
  145. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  146. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -0,0 +1,46 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+ //
7
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8
+ // Use of this source code is governed by a BSD-style license that can be
9
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
+
11
+ #include "db/compaction/compaction_state.h"
12
+
13
+ namespace ROCKSDB_NAMESPACE {
14
+
15
+ Slice CompactionState::SmallestUserKey() {
16
+ for (const auto& sub_compact_state : sub_compact_states) {
17
+ Slice smallest = sub_compact_state.SmallestUserKey();
18
+ if (!smallest.empty()) {
19
+ return smallest;
20
+ }
21
+ }
22
+ // If there is no finished output, return an empty slice.
23
+ return Slice{nullptr, 0};
24
+ }
25
+
26
+ Slice CompactionState::LargestUserKey() {
27
+ for (auto it = sub_compact_states.rbegin(); it < sub_compact_states.rend();
28
+ ++it) {
29
+ Slice largest = it->LargestUserKey();
30
+ if (!largest.empty()) {
31
+ return largest;
32
+ }
33
+ }
34
+ // If there is no finished output, return an empty slice.
35
+ return Slice{nullptr, 0};
36
+ }
37
+
38
+ void CompactionState::AggregateCompactionStats(
39
+ InternalStats::CompactionStatsFull& compaction_stats,
40
+ CompactionJobStats& compaction_job_stats) {
41
+ for (const auto& sc : sub_compact_states) {
42
+ sc.AggregateCompactionStats(compaction_stats);
43
+ compaction_job_stats.Add(sc.compaction_job_stats);
44
+ }
45
+ }
46
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,42 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+ //
7
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8
+ // Use of this source code is governed by a BSD-style license that can be
9
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
+
11
+ #pragma once
12
+
13
+ #include "db/compaction/compaction.h"
14
+ #include "db/compaction/subcompaction_state.h"
15
+ #include "db/internal_stats.h"
16
+
17
+ // Data structures used for compaction_job and compaction_service_job which has
18
+ // the list of sub_compact_states and the aggregated information for the
19
+ // compaction.
20
+ namespace ROCKSDB_NAMESPACE {
21
+
22
+ // Maintains state for the entire compaction
23
+ class CompactionState {
24
+ public:
25
+ Compaction* const compaction;
26
+
27
+ // REQUIRED: subcompaction states are stored in order of increasing key-range
28
+ std::vector<SubcompactionState> sub_compact_states;
29
+ Status status;
30
+
31
+ void AggregateCompactionStats(
32
+ InternalStats::CompactionStatsFull& compaction_stats,
33
+ CompactionJobStats& compaction_job_stats);
34
+
35
+ explicit CompactionState(Compaction* c) : compaction(c) {}
36
+
37
+ Slice SmallestUserKey();
38
+
39
+ Slice LargestUserKey();
40
+ };
41
+
42
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,223 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+ //
7
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8
+ // Use of this source code is governed by a BSD-style license that can be
9
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
+
11
+ #include "db/compaction/subcompaction_state.h"
12
+
13
+ #include "rocksdb/sst_partitioner.h"
14
+
15
+ namespace ROCKSDB_NAMESPACE {
16
+ void SubcompactionState::AggregateCompactionStats(
17
+ InternalStats::CompactionStatsFull& compaction_stats) const {
18
+ compaction_stats.stats.Add(compaction_outputs_.stats_);
19
+ if (HasPenultimateLevelOutputs()) {
20
+ compaction_stats.has_penultimate_level_output = true;
21
+ compaction_stats.penultimate_level_stats.Add(
22
+ penultimate_level_outputs_.stats_);
23
+ }
24
+ }
25
+
26
+ void SubcompactionState::FillFilesToCutForTtl() {
27
+ if (compaction->immutable_options()->compaction_style !=
28
+ CompactionStyle::kCompactionStyleLevel ||
29
+ compaction->immutable_options()->compaction_pri !=
30
+ CompactionPri::kMinOverlappingRatio ||
31
+ compaction->mutable_cf_options()->ttl == 0 ||
32
+ compaction->num_input_levels() < 2 || compaction->bottommost_level()) {
33
+ return;
34
+ }
35
+
36
+ // We define new file with the oldest ancestor time to be younger than 1/4
37
+ // TTL, and an old one to be older than 1/2 TTL time.
38
+ int64_t temp_current_time;
39
+ auto get_time_status = compaction->immutable_options()->clock->GetCurrentTime(
40
+ &temp_current_time);
41
+ if (!get_time_status.ok()) {
42
+ return;
43
+ }
44
+ auto current_time = static_cast<uint64_t>(temp_current_time);
45
+ if (current_time < compaction->mutable_cf_options()->ttl) {
46
+ return;
47
+ }
48
+ uint64_t old_age_thres =
49
+ current_time - compaction->mutable_cf_options()->ttl / 2;
50
+
51
+ const std::vector<FileMetaData*>& olevel =
52
+ *(compaction->inputs(compaction->num_input_levels() - 1));
53
+ for (FileMetaData* file : olevel) {
54
+ // Worth filtering out by start and end?
55
+ uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
56
+ // We put old files if they are not too small to prevent a flood
57
+ // of small files.
58
+ if (oldest_ancester_time < old_age_thres &&
59
+ file->fd.GetFileSize() >
60
+ compaction->mutable_cf_options()->target_file_size_base / 2) {
61
+ files_to_cut_for_ttl_.push_back(file);
62
+ }
63
+ }
64
+ }
65
+
66
+ OutputIterator SubcompactionState::GetOutputs() const {
67
+ return OutputIterator(penultimate_level_outputs_.outputs_,
68
+ compaction_outputs_.outputs_);
69
+ }
70
+
71
+ void SubcompactionState::Cleanup(Cache* cache) {
72
+ penultimate_level_outputs_.Cleanup();
73
+ compaction_outputs_.Cleanup();
74
+
75
+ if (!status.ok()) {
76
+ for (const auto& out : GetOutputs()) {
77
+ // If this file was inserted into the table cache then remove
78
+ // them here because this compaction was not committed.
79
+ TableCache::Evict(cache, out.meta.fd.GetNumber());
80
+ }
81
+ }
82
+ // TODO: sub_compact.io_status is not checked like status. Not sure if thats
83
+ // intentional. So ignoring the io_status as of now.
84
+ io_status.PermitUncheckedError();
85
+ }
86
+
87
+ Slice SubcompactionState::SmallestUserKey() const {
88
+ if (has_penultimate_level_outputs_) {
89
+ Slice a = compaction_outputs_.SmallestUserKey();
90
+ Slice b = penultimate_level_outputs_.SmallestUserKey();
91
+ if (a.empty()) {
92
+ return b;
93
+ }
94
+ if (b.empty()) {
95
+ return a;
96
+ }
97
+ const Comparator* user_cmp =
98
+ compaction->column_family_data()->user_comparator();
99
+ if (user_cmp->Compare(a, b) > 0) {
100
+ return b;
101
+ } else {
102
+ return a;
103
+ }
104
+ } else {
105
+ return compaction_outputs_.SmallestUserKey();
106
+ }
107
+ }
108
+
109
+ Slice SubcompactionState::LargestUserKey() const {
110
+ if (has_penultimate_level_outputs_) {
111
+ Slice a = compaction_outputs_.LargestUserKey();
112
+ Slice b = penultimate_level_outputs_.LargestUserKey();
113
+ if (a.empty()) {
114
+ return b;
115
+ }
116
+ if (b.empty()) {
117
+ return a;
118
+ }
119
+ const Comparator* user_cmp =
120
+ compaction->column_family_data()->user_comparator();
121
+ if (user_cmp->Compare(a, b) < 0) {
122
+ return b;
123
+ } else {
124
+ return a;
125
+ }
126
+ } else {
127
+ return compaction_outputs_.LargestUserKey();
128
+ }
129
+ }
130
+
131
+ bool SubcompactionState::ShouldStopBefore(const Slice& internal_key) {
132
+ uint64_t curr_file_size = Current().GetCurrentOutputFileSize();
133
+ const InternalKeyComparator* icmp =
134
+ &compaction->column_family_data()->internal_comparator();
135
+
136
+ // Invalid local_output_split_key indicates that we do not need to split
137
+ if (local_output_split_key_ != nullptr && !is_split_) {
138
+ // Split occurs when the next key is larger than/equal to the cursor
139
+ if (icmp->Compare(internal_key, local_output_split_key_->Encode()) >= 0) {
140
+ is_split_ = true;
141
+ return true;
142
+ }
143
+ }
144
+
145
+ const std::vector<FileMetaData*>& grandparents = compaction->grandparents();
146
+ bool grandparant_file_switched = false;
147
+ // Scan to find the earliest grandparent file that contains key.
148
+ while (grandparent_index_ < grandparents.size() &&
149
+ icmp->Compare(internal_key,
150
+ grandparents[grandparent_index_]->largest.Encode()) >
151
+ 0) {
152
+ if (seen_key_) {
153
+ overlapped_bytes_ += grandparents[grandparent_index_]->fd.GetFileSize();
154
+ grandparant_file_switched = true;
155
+ }
156
+ assert(grandparent_index_ + 1 >= grandparents.size() ||
157
+ icmp->Compare(
158
+ grandparents[grandparent_index_]->largest.Encode(),
159
+ grandparents[grandparent_index_ + 1]->smallest.Encode()) <= 0);
160
+ grandparent_index_++;
161
+ }
162
+ seen_key_ = true;
163
+
164
+ if (grandparant_file_switched &&
165
+ overlapped_bytes_ + curr_file_size > compaction->max_compaction_bytes()) {
166
+ // Too much overlap for current output; start new output
167
+ overlapped_bytes_ = 0;
168
+ return true;
169
+ }
170
+
171
+ if (!files_to_cut_for_ttl_.empty()) {
172
+ if (cur_files_to_cut_for_ttl_ != -1) {
173
+ // Previous key is inside the range of a file
174
+ if (icmp->Compare(internal_key,
175
+ files_to_cut_for_ttl_[cur_files_to_cut_for_ttl_]
176
+ ->largest.Encode()) > 0) {
177
+ next_files_to_cut_for_ttl_ = cur_files_to_cut_for_ttl_ + 1;
178
+ cur_files_to_cut_for_ttl_ = -1;
179
+ return true;
180
+ }
181
+ } else {
182
+ // Look for the key position
183
+ while (next_files_to_cut_for_ttl_ <
184
+ static_cast<int>(files_to_cut_for_ttl_.size())) {
185
+ if (icmp->Compare(internal_key,
186
+ files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
187
+ ->smallest.Encode()) >= 0) {
188
+ if (icmp->Compare(internal_key,
189
+ files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
190
+ ->largest.Encode()) <= 0) {
191
+ // With in the current file
192
+ cur_files_to_cut_for_ttl_ = next_files_to_cut_for_ttl_;
193
+ return true;
194
+ }
195
+ // Beyond the current file
196
+ next_files_to_cut_for_ttl_++;
197
+ } else {
198
+ // Still fall into the gap
199
+ break;
200
+ }
201
+ }
202
+ }
203
+ }
204
+
205
+ return false;
206
+ }
207
+
208
+ Status SubcompactionState::AddToOutput(
209
+ const CompactionIterator& iter,
210
+ const CompactionFileOpenFunc& open_file_func,
211
+ const CompactionFileCloseFunc& close_file_func) {
212
+ // update target output first
213
+ is_current_penultimate_level_ = iter.output_to_penultimate_level();
214
+ current_outputs_ = is_current_penultimate_level_ ? &penultimate_level_outputs_
215
+ : &compaction_outputs_;
216
+ if (is_current_penultimate_level_) {
217
+ has_penultimate_level_outputs_ = true;
218
+ }
219
+
220
+ return Current().AddToOutput(iter, open_file_func, close_file_func);
221
+ }
222
+
223
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,255 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+ //
7
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
8
+ // Use of this source code is governed by a BSD-style license that can be
9
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
10
+
11
+ #pragma once
12
+
13
+ #include "db/blob/blob_file_addition.h"
14
+ #include "db/blob/blob_garbage_meter.h"
15
+ #include "db/compaction/compaction.h"
16
+ #include "db/compaction/compaction_iterator.h"
17
+ #include "db/compaction/compaction_outputs.h"
18
+ #include "db/internal_stats.h"
19
+ #include "db/output_validator.h"
20
+ #include "db/range_del_aggregator.h"
21
+
22
+ namespace ROCKSDB_NAMESPACE {
23
+
24
+ // Maintains state and outputs for each sub-compaction
25
+ // It contains 2 `CompactionOutputs`:
26
+ // 1. one for the normal output files
27
+ // 2. another for the penultimate level outputs
28
+ // a `current` pointer maintains the current output group, when calling
29
+ // `AddToOutput()`, it checks the output of the current compaction_iterator key
30
+ // and point `current` to the target output group. By default, it just points to
31
+ // normal compaction_outputs, if the compaction_iterator key should be placed on
32
+ // the penultimate level, `current` is changed to point to
33
+ // `penultimate_level_outputs`.
34
+ // The later operations uses `Current()` to get the target group.
35
+ //
36
+ // +----------+ +-----------------------------+ +---------+
37
+ // | *current |--------> | compaction_outputs |----->| output |
38
+ // +----------+ +-----------------------------+ +---------+
39
+ // | | output |
40
+ // | +---------+
41
+ // | | ... |
42
+ // |
43
+ // | +-----------------------------+ +---------+
44
+ // +-------------> | penultimate_level_outputs |----->| output |
45
+ // +-----------------------------+ +---------+
46
+ // | ... |
47
+
48
+ class SubcompactionState {
49
+ public:
50
+ const Compaction* compaction;
51
+
52
+ // The boundaries of the key-range this compaction is interested in. No two
53
+ // sub-compactions may have overlapping key-ranges.
54
+ // 'start' is inclusive, 'end' is exclusive, and nullptr means unbounded
55
+ const Slice *start, *end;
56
+
57
+ // The return status of this sub-compaction
58
+ Status status;
59
+
60
+ // The return IO Status of this sub-compaction
61
+ IOStatus io_status;
62
+
63
+ // Notify on sub-compaction completion only if listener was notified on
64
+ // sub-compaction begin.
65
+ bool notify_on_subcompaction_completion = false;
66
+
67
+ // compaction job stats for this sub-compaction
68
+ CompactionJobStats compaction_job_stats;
69
+
70
+ // sub-compaction job id, which is used to identify different sub-compaction
71
+ // within the same compaction job.
72
+ const uint32_t sub_job_id;
73
+
74
+ Slice SmallestUserKey() const;
75
+
76
+ Slice LargestUserKey() const;
77
+
78
+ // Get all outputs from the subcompaction. For per_key_placement compaction,
79
+ // it returns both the last level outputs and penultimate level outputs.
80
+ OutputIterator GetOutputs() const;
81
+
82
+ // Assign range dels aggregator, for each range_del, it can only be assigned
83
+ // to one output level, for per_key_placement, it's going to be the
84
+ // penultimate level.
85
+ void AssignRangeDelAggregator(
86
+ std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
87
+ if (compaction->SupportsPerKeyPlacement()) {
88
+ penultimate_level_outputs_.AssignRangeDelAggregator(
89
+ std::move(range_del_agg));
90
+ } else {
91
+ compaction_outputs_.AssignRangeDelAggregator(std::move(range_del_agg));
92
+ }
93
+ }
94
+
95
+ void RemoveLastEmptyOutput() {
96
+ compaction_outputs_.RemoveLastEmptyOutput();
97
+ penultimate_level_outputs_.RemoveLastEmptyOutput();
98
+ }
99
+
100
+ #ifndef ROCKSDB_LITE
101
+ void BuildSubcompactionJobInfo(
102
+ SubcompactionJobInfo& subcompaction_job_info) const {
103
+ const Compaction* c = compaction;
104
+ const ColumnFamilyData* cfd = c->column_family_data();
105
+
106
+ subcompaction_job_info.cf_id = cfd->GetID();
107
+ subcompaction_job_info.cf_name = cfd->GetName();
108
+ subcompaction_job_info.status = status;
109
+ subcompaction_job_info.subcompaction_job_id = static_cast<int>(sub_job_id);
110
+ subcompaction_job_info.base_input_level = c->start_level();
111
+ subcompaction_job_info.output_level = c->output_level();
112
+ subcompaction_job_info.stats = compaction_job_stats;
113
+ }
114
+ #endif // !ROCKSDB_LITE
115
+
116
+ SubcompactionState() = delete;
117
+ SubcompactionState(const SubcompactionState&) = delete;
118
+ SubcompactionState& operator=(const SubcompactionState&) = delete;
119
+
120
+ SubcompactionState(Compaction* c, Slice* _start, Slice* _end,
121
+ uint32_t _sub_job_id)
122
+ : compaction(c),
123
+ start(_start),
124
+ end(_end),
125
+ sub_job_id(_sub_job_id),
126
+ compaction_outputs_(c, /*is_penultimate_level=*/false),
127
+ penultimate_level_outputs_(c, /*is_penultimate_level=*/true) {
128
+ assert(compaction != nullptr);
129
+ const InternalKeyComparator* icmp =
130
+ &compaction->column_family_data()->internal_comparator();
131
+ const InternalKey* output_split_key = compaction->GetOutputSplitKey();
132
+ // Invalid output_split_key indicates that we do not need to split
133
+ if (output_split_key != nullptr) {
134
+ // We may only split the output when the cursor is in the range. Split
135
+ if ((end == nullptr || icmp->user_comparator()->Compare(
136
+ ExtractUserKey(output_split_key->Encode()),
137
+ ExtractUserKey(*end)) < 0) &&
138
+ (start == nullptr || icmp->user_comparator()->Compare(
139
+ ExtractUserKey(output_split_key->Encode()),
140
+ ExtractUserKey(*start)) > 0)) {
141
+ local_output_split_key_ = output_split_key;
142
+ }
143
+ }
144
+ }
145
+
146
+ SubcompactionState(SubcompactionState&& state) noexcept
147
+ : compaction(state.compaction),
148
+ start(state.start),
149
+ end(state.end),
150
+ status(std::move(state.status)),
151
+ io_status(std::move(state.io_status)),
152
+ notify_on_subcompaction_completion(
153
+ state.notify_on_subcompaction_completion),
154
+ compaction_job_stats(std::move(state.compaction_job_stats)),
155
+ sub_job_id(state.sub_job_id),
156
+ files_to_cut_for_ttl_(std::move(state.files_to_cut_for_ttl_)),
157
+ cur_files_to_cut_for_ttl_(state.cur_files_to_cut_for_ttl_),
158
+ next_files_to_cut_for_ttl_(state.next_files_to_cut_for_ttl_),
159
+ grandparent_index_(state.grandparent_index_),
160
+ overlapped_bytes_(state.overlapped_bytes_),
161
+ seen_key_(state.seen_key_),
162
+ compaction_outputs_(std::move(state.compaction_outputs_)),
163
+ penultimate_level_outputs_(std::move(state.penultimate_level_outputs_)),
164
+ is_current_penultimate_level_(state.is_current_penultimate_level_),
165
+ has_penultimate_level_outputs_(state.has_penultimate_level_outputs_) {
166
+ current_outputs_ = is_current_penultimate_level_
167
+ ? &penultimate_level_outputs_
168
+ : &compaction_outputs_;
169
+ }
170
+
171
+ bool HasPenultimateLevelOutputs() const {
172
+ return has_penultimate_level_outputs_ ||
173
+ penultimate_level_outputs_.HasRangeDel();
174
+ }
175
+
176
+ void FillFilesToCutForTtl();
177
+
178
+ // Returns true iff we should stop building the current output
179
+ // before processing "internal_key".
180
+ bool ShouldStopBefore(const Slice& internal_key);
181
+
182
+ bool IsCurrentPenultimateLevel() const {
183
+ return is_current_penultimate_level_;
184
+ }
185
+
186
+ // Add all the new files from this compaction to version_edit
187
+ void AddOutputsEdit(VersionEdit* out_edit) const {
188
+ for (const auto& file : penultimate_level_outputs_.outputs_) {
189
+ out_edit->AddFile(compaction->GetPenultimateLevel(), file.meta);
190
+ }
191
+ for (const auto& file : compaction_outputs_.outputs_) {
192
+ out_edit->AddFile(compaction->output_level(), file.meta);
193
+ }
194
+ }
195
+
196
+ void Cleanup(Cache* cache);
197
+
198
+ void AggregateCompactionStats(
199
+ InternalStats::CompactionStatsFull& compaction_stats) const;
200
+
201
+ CompactionOutputs& Current() const {
202
+ assert(current_outputs_);
203
+ return *current_outputs_;
204
+ }
205
+
206
+ // Add compaction_iterator key/value to the `Current` output group.
207
+ Status AddToOutput(const CompactionIterator& iter,
208
+ const CompactionFileOpenFunc& open_file_func,
209
+ const CompactionFileCloseFunc& close_file_func);
210
+
211
+ // Close all compaction output files, both output_to_penultimate_level outputs
212
+ // and normal outputs.
213
+ Status CloseCompactionFiles(const Status& curr_status,
214
+ const CompactionFileOpenFunc& open_file_func,
215
+ const CompactionFileCloseFunc& close_file_func) {
216
+ // Call FinishCompactionOutputFile() even if status is not ok: it needs to
217
+ // close the output file.
218
+ Status s = penultimate_level_outputs_.CloseOutput(
219
+ curr_status, open_file_func, close_file_func);
220
+ s = compaction_outputs_.CloseOutput(s, open_file_func, close_file_func);
221
+ return s;
222
+ }
223
+
224
+ private:
225
+ // Some identified files with old oldest ancester time and the range should be
226
+ // isolated out so that the output file(s) in that range can be merged down
227
+ // for TTL and clear the timestamps for the range.
228
+ std::vector<FileMetaData*> files_to_cut_for_ttl_;
229
+ int cur_files_to_cut_for_ttl_ = -1;
230
+ int next_files_to_cut_for_ttl_ = 0;
231
+
232
+ // An index that used to speed up ShouldStopBefore().
233
+ size_t grandparent_index_ = 0;
234
+ // The number of bytes overlapping between the current output and
235
+ // grandparent files used in ShouldStopBefore().
236
+ uint64_t overlapped_bytes_ = 0;
237
+ // A flag determines whether the key has been seen in ShouldStopBefore()
238
+ bool seen_key_ = false;
239
+
240
+ // A flag determines if this subcompaction has been split by the cursor
241
+ bool is_split_ = false;
242
+
243
+ // We also maintain the output split key for each subcompaction to avoid
244
+ // repetitive comparison in ShouldStopBefore()
245
+ const InternalKey* local_output_split_key_ = nullptr;
246
+
247
+ // State kept for output being generated
248
+ CompactionOutputs compaction_outputs_;
249
+ CompactionOutputs penultimate_level_outputs_;
250
+ CompactionOutputs* current_outputs_ = &compaction_outputs_;
251
+ bool is_current_penultimate_level_ = false;
252
+ bool has_penultimate_level_outputs_ = false;
253
+ };
254
+
255
+ } // namespace ROCKSDB_NAMESPACE