@nxtedition/rocksdb 7.0.26 → 7.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/binding.cc +67 -25
  2. package/chained-batch.js +1 -1
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -0
  4. package/deps/rocksdb/rocksdb/Makefile +3 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +10 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +17 -7
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
  9. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +117 -0
  10. package/deps/rocksdb/rocksdb/cache/charged_cache.h +121 -0
  11. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +270 -180
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.h +412 -124
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +1 -0
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +1 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -2
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +71 -9
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +11 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +21 -14
  21. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +68 -7
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +16 -0
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +519 -12
  24. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +120 -0
  25. package/deps/rocksdb/rocksdb/db/builder.cc +15 -5
  26. package/deps/rocksdb/rocksdb/db/builder.h +3 -0
  27. package/deps/rocksdb/rocksdb/db/c.cc +18 -0
  28. package/deps/rocksdb/rocksdb/db/c_test.c +18 -0
  29. package/deps/rocksdb/rocksdb/db/column_family.h +2 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +3 -2
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +9 -4
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +15 -10
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +36 -34
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +50 -13
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +12 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +13 -17
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +26 -9
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +0 -11
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +93 -0
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3 -8
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +8 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +17 -5
  46. package/deps/rocksdb/rocksdb/db/db_test.cc +0 -3
  47. package/deps/rocksdb/rocksdb/db/db_test2.cc +39 -12
  48. package/deps/rocksdb/rocksdb/db/db_test_util.cc +9 -0
  49. package/deps/rocksdb/rocksdb/db/db_test_util.h +2 -0
  50. package/deps/rocksdb/rocksdb/db/dbformat.cc +0 -38
  51. package/deps/rocksdb/rocksdb/db/dbformat.h +14 -13
  52. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +5 -2
  53. package/deps/rocksdb/rocksdb/db/event_helpers.cc +13 -1
  54. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +0 -10
  55. package/deps/rocksdb/rocksdb/db/flush_job.cc +19 -15
  56. package/deps/rocksdb/rocksdb/db/flush_job.h +7 -0
  57. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +21 -15
  58. package/deps/rocksdb/rocksdb/db/forward_iterator.h +4 -3
  59. package/deps/rocksdb/rocksdb/db/memtable_list.cc +9 -0
  60. package/deps/rocksdb/rocksdb/db/memtable_list.h +5 -0
  61. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +53 -12
  62. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +14 -2
  63. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +10 -10
  64. package/deps/rocksdb/rocksdb/db/repair.cc +8 -6
  65. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +890 -0
  66. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +324 -0
  67. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +186 -0
  68. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
  69. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -4
  70. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +23 -2
  71. package/deps/rocksdb/rocksdb/env/env_test.cc +74 -1
  72. package/deps/rocksdb/rocksdb/env/io_posix.cc +11 -8
  73. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +28 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
  75. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
  76. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +30 -23
  77. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +3 -13
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  80. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +1 -2
  81. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  83. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  84. package/deps/rocksdb/rocksdb/options/cf_options.cc +14 -1
  85. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  86. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -56
  87. package/deps/rocksdb/rocksdb/options/db_options.cc +4 -5
  88. package/deps/rocksdb/rocksdb/options/options.cc +11 -1
  89. package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -0
  90. package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
  91. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -0
  92. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -0
  93. package/deps/rocksdb/rocksdb/src.mk +3 -0
  94. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +6 -1
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +4 -0
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +36 -3
  97. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +36 -1
  98. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +14 -3
  99. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  100. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +6 -0
  101. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +5 -0
  102. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +3 -0
  103. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -7
  104. package/deps/rocksdb/rocksdb/table/table_builder.h +7 -3
  105. package/deps/rocksdb/rocksdb/table/table_properties.cc +9 -0
  106. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +3 -2
  107. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +58 -30
  108. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +1 -0
  109. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +20 -0
  110. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +29 -154
  111. package/deps/rocksdb/rocksdb/util/rate_limiter.h +16 -34
  112. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +0 -92
  113. package/deps/rocksdb/rocksdb/util/timer.h +6 -0
  114. package/deps/rocksdb/rocksdb/util/vector_iterator.h +4 -3
  115. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -45
  116. package/deps/rocksdb/rocksdb/utilities/debug.cc +40 -0
  117. package/deps/rocksdb/rocksdb.gyp +2 -0
  118. package/index.js +4 -0
  119. package/package.json +1 -1
  120. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  121. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -0,0 +1,324 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+
7
+ #include "db/seqno_to_time_mapping.h"
8
+
9
+ #include "db/version_edit.h"
10
+ #include "util/string_util.h"
11
+
12
+ namespace ROCKSDB_NAMESPACE {
13
+
14
+ uint64_t SeqnoToTimeMapping::GetOldestApproximateTime(
15
+ const SequenceNumber seqno) const {
16
+ assert(is_sorted_);
17
+ auto it = std::upper_bound(seqno_time_mapping_.begin(),
18
+ seqno_time_mapping_.end(), seqno);
19
+ if (it == seqno_time_mapping_.begin()) {
20
+ return 0;
21
+ }
22
+ it--;
23
+ return it->time;
24
+ }
25
+
26
+ void SeqnoToTimeMapping::Add(SequenceNumber seqno, uint64_t time) {
27
+ if (seqno == 0) {
28
+ return;
29
+ }
30
+ is_sorted_ = false;
31
+ seqno_time_mapping_.emplace_back(seqno, time);
32
+ }
33
+
34
+ SequenceNumber SeqnoToTimeMapping::TruncateOldEntries(const uint64_t now) {
35
+ assert(is_sorted_);
36
+
37
+ if (max_time_duration_ == 0) {
38
+ return 0;
39
+ }
40
+
41
+ const uint64_t cut_off_time =
42
+ now > max_time_duration_ ? now - max_time_duration_ : 0;
43
+ assert(cut_off_time <= now); // no overflow
44
+
45
+ auto it = std::upper_bound(
46
+ seqno_time_mapping_.begin(), seqno_time_mapping_.end(), cut_off_time,
47
+ [](uint64_t target, const SeqnoTimePair& other) -> bool {
48
+ return target < other.time;
49
+ });
50
+ if (it == seqno_time_mapping_.begin()) {
51
+ return 0;
52
+ }
53
+ it--;
54
+ seqno_time_mapping_.erase(seqno_time_mapping_.begin(), it);
55
+
56
+ return seqno_time_mapping_.front().seqno;
57
+ }
58
+
59
+ // The encoded format is:
60
+ // [num_of_entries][[seqno][time],[seqno][time],...]
61
+ // ^ ^
62
+ // var_int delta_encoded (var_int)
63
+ void SeqnoToTimeMapping::Encode(std::string& dest, const SequenceNumber start,
64
+ const SequenceNumber end, const uint64_t now,
65
+ const uint64_t output_size) const {
66
+ assert(is_sorted_);
67
+ if (start > end) {
68
+ // It could happen when the SST file is empty, the initial value of min
69
+ // sequence number is kMaxSequenceNumber and max is 0.
70
+ // The empty output file will be removed in the final step of compaction.
71
+ return;
72
+ }
73
+
74
+ auto start_it = std::upper_bound(seqno_time_mapping_.begin(),
75
+ seqno_time_mapping_.end(), start);
76
+ if (start_it != seqno_time_mapping_.begin()) {
77
+ start_it--;
78
+ }
79
+
80
+ auto end_it = std::upper_bound(seqno_time_mapping_.begin(),
81
+ seqno_time_mapping_.end(), end);
82
+ if (end_it == seqno_time_mapping_.begin()) {
83
+ return;
84
+ }
85
+ if (start_it >= end_it) {
86
+ return;
87
+ }
88
+
89
+ // truncate old entries that are not needed
90
+ if (max_time_duration_ > 0) {
91
+ const uint64_t cut_off_time =
92
+ now > max_time_duration_ ? now - max_time_duration_ : 0;
93
+ while (start_it < end_it && start_it->time < cut_off_time) {
94
+ start_it++;
95
+ }
96
+ }
97
+
98
+ // If there are more data than needed, pick the entries for encoding.
99
+ // It's not the most optimized algorithm for selecting the best representative
100
+ // entries over the time.
101
+ // It starts from the beginning and makes sure the distance is larger than
102
+ // `(end - start) / size` before selecting the number. For example, for the
103
+ // following list, pick 3 entries (it will pick seqno #1, #6, #8):
104
+ // 1 -> 10
105
+ // 5 -> 17
106
+ // 6 -> 25
107
+ // 8 -> 30
108
+ // first, it always picks the first one, then there are 2 num_entries_to_fill
109
+ // and the time difference between current one vs. the last one is
110
+ // (30 - 10) = 20. 20/2 = 10. So it will skip until 10+10 = 20. => it skips
111
+ // #5 and pick #6.
112
+ // But the most optimized solution is picking #1 #5 #8, as it will be more
113
+ // evenly distributed for time. Anyway the following algorithm is simple and
114
+ // may over-select new data, which is good. We do want more accurate time
115
+ // information for recent data.
116
+ std::deque<SeqnoTimePair> output_copy;
117
+ if (std::distance(start_it, end_it) > static_cast<int64_t>(output_size)) {
118
+ int64_t num_entries_to_fill = static_cast<int64_t>(output_size);
119
+ auto last_it = end_it;
120
+ last_it--;
121
+ uint64_t end_time = last_it->time;
122
+ uint64_t skip_until_time = 0;
123
+ for (auto it = start_it; it < end_it; it++) {
124
+ // skip if it's not reach the skip_until_time yet
125
+ if (std::distance(it, end_it) > num_entries_to_fill &&
126
+ it->time < skip_until_time) {
127
+ continue;
128
+ }
129
+ output_copy.push_back(*it);
130
+ num_entries_to_fill--;
131
+ if (std::distance(it, end_it) > num_entries_to_fill &&
132
+ num_entries_to_fill > 0) {
133
+ // If there are more entries than we need, re-calculate the
134
+ // skip_until_time, which means skip until that time
135
+ skip_until_time =
136
+ it->time + ((end_time - it->time) / num_entries_to_fill);
137
+ }
138
+ }
139
+
140
+ // Make sure all entries are filled
141
+ assert(num_entries_to_fill == 0);
142
+ start_it = output_copy.begin();
143
+ end_it = output_copy.end();
144
+ }
145
+
146
+ // Delta encode the data
147
+ uint64_t size = std::distance(start_it, end_it);
148
+ PutVarint64(&dest, size);
149
+ SeqnoTimePair base;
150
+ for (auto it = start_it; it < end_it; it++) {
151
+ assert(base < *it);
152
+ SeqnoTimePair val = *it - base;
153
+ base = *it;
154
+ val.Encode(dest);
155
+ }
156
+ }
157
+
158
+ Status SeqnoToTimeMapping::Add(const std::string& seqno_time_mapping_str) {
159
+ Slice input(seqno_time_mapping_str);
160
+ if (input.empty()) {
161
+ return Status::OK();
162
+ }
163
+ uint64_t size;
164
+ if (!GetVarint64(&input, &size)) {
165
+ return Status::Corruption("Invalid sequence number time size");
166
+ }
167
+ is_sorted_ = false;
168
+ SeqnoTimePair base;
169
+ for (uint64_t i = 0; i < size; i++) {
170
+ SeqnoTimePair val;
171
+ Status s = val.Decode(input);
172
+ if (!s.ok()) {
173
+ return s;
174
+ }
175
+ val.Add(base);
176
+ seqno_time_mapping_.emplace_back(val);
177
+ base = val;
178
+ }
179
+ return Status::OK();
180
+ }
181
+
182
+ void SeqnoToTimeMapping::SeqnoTimePair::Encode(std::string& dest) const {
183
+ PutVarint64Varint64(&dest, seqno, time);
184
+ }
185
+
186
+ Status SeqnoToTimeMapping::SeqnoTimePair::Decode(Slice& input) {
187
+ if (!GetVarint64(&input, &seqno)) {
188
+ return Status::Corruption("Invalid sequence number");
189
+ }
190
+ if (!GetVarint64(&input, &time)) {
191
+ return Status::Corruption("Invalid time");
192
+ }
193
+ return Status::OK();
194
+ }
195
+
196
+ bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) {
197
+ assert(is_sorted_);
198
+
199
+ // skip seq number 0, which may have special meaning, like zeroed out data
200
+ if (seqno == 0) {
201
+ return false;
202
+ }
203
+ if (!Empty()) {
204
+ if (seqno < Last().seqno || time < Last().time) {
205
+ return false;
206
+ }
207
+ if (seqno == Last().seqno) {
208
+ Last().time = time;
209
+ return true;
210
+ }
211
+ if (time == Last().time) {
212
+ // new sequence has the same time as old one, no need to add new mapping
213
+ return false;
214
+ }
215
+ }
216
+
217
+ seqno_time_mapping_.emplace_back(seqno, time);
218
+
219
+ if (seqno_time_mapping_.size() > max_capacity_) {
220
+ seqno_time_mapping_.pop_front();
221
+ }
222
+ return true;
223
+ }
224
+
225
+ bool SeqnoToTimeMapping::Resize(uint64_t min_time_duration,
226
+ uint64_t max_time_duration) {
227
+ uint64_t new_max_capacity =
228
+ CalculateMaxCapacity(min_time_duration, max_time_duration);
229
+ if (new_max_capacity == max_capacity_) {
230
+ return false;
231
+ } else if (new_max_capacity < seqno_time_mapping_.size()) {
232
+ uint64_t delta = seqno_time_mapping_.size() - new_max_capacity;
233
+ seqno_time_mapping_.erase(seqno_time_mapping_.begin(),
234
+ seqno_time_mapping_.begin() + delta);
235
+ }
236
+ max_capacity_ = new_max_capacity;
237
+ return true;
238
+ }
239
+
240
+ Status SeqnoToTimeMapping::Sort() {
241
+ if (is_sorted_) {
242
+ return Status::OK();
243
+ }
244
+ if (seqno_time_mapping_.empty()) {
245
+ is_sorted_ = true;
246
+ return Status::OK();
247
+ }
248
+
249
+ std::deque<SeqnoTimePair> copy = std::move(seqno_time_mapping_);
250
+
251
+ std::sort(copy.begin(), copy.end());
252
+
253
+ seqno_time_mapping_.clear();
254
+
255
+ // remove seqno = 0, which may have special meaning, like zeroed out data
256
+ while (copy.front().seqno == 0) {
257
+ copy.pop_front();
258
+ }
259
+
260
+ SeqnoTimePair prev = copy.front();
261
+ for (const auto& it : copy) {
262
+ // If sequence number is the same, pick the one with larger time, which is
263
+ // more accurate than the older time.
264
+ if (it.seqno == prev.seqno) {
265
+ assert(it.time >= prev.time);
266
+ prev.time = it.time;
267
+ } else {
268
+ assert(it.seqno > prev.seqno);
269
+ // If a larger sequence number has an older time which is not useful, skip
270
+ if (it.time > prev.time) {
271
+ seqno_time_mapping_.push_back(prev);
272
+ prev = it;
273
+ }
274
+ }
275
+ }
276
+ seqno_time_mapping_.emplace_back(prev);
277
+
278
+ is_sorted_ = true;
279
+ return Status::OK();
280
+ }
281
+
282
+ std::string SeqnoToTimeMapping::ToHumanString() const {
283
+ std::string ret;
284
+ for (const auto& seq_time : seqno_time_mapping_) {
285
+ AppendNumberTo(&ret, seq_time.seqno);
286
+ ret.append("->");
287
+ AppendNumberTo(&ret, seq_time.time);
288
+ ret.append(",");
289
+ }
290
+ return ret;
291
+ }
292
+
293
+ SeqnoToTimeMapping SeqnoToTimeMapping::Copy(
294
+ SequenceNumber smallest_seqno) const {
295
+ SeqnoToTimeMapping ret;
296
+ auto it = std::upper_bound(seqno_time_mapping_.begin(),
297
+ seqno_time_mapping_.end(), smallest_seqno);
298
+ if (it != seqno_time_mapping_.begin()) {
299
+ it--;
300
+ }
301
+ std::copy(it, seqno_time_mapping_.end(),
302
+ std::back_inserter(ret.seqno_time_mapping_));
303
+ return ret;
304
+ }
305
+
306
+ uint64_t SeqnoToTimeMapping::CalculateMaxCapacity(uint64_t min_time_duration,
307
+ uint64_t max_time_duration) {
308
+ if (min_time_duration == 0) {
309
+ return 0;
310
+ }
311
+ return std::min(
312
+ kMaxSeqnoToTimeEntries,
313
+ max_time_duration * kMaxSeqnoTimePairsPerCF / min_time_duration);
314
+ }
315
+
316
+ SeqnoToTimeMapping::SeqnoTimePair SeqnoToTimeMapping::SeqnoTimePair::operator-(
317
+ const SeqnoTimePair& other) const {
318
+ SeqnoTimePair res;
319
+ res.seqno = seqno - other.seqno;
320
+ res.time = time - other.time;
321
+ return res;
322
+ }
323
+
324
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,186 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+
7
+ #pragma once
8
+
9
+ #include <algorithm>
10
+ #include <cinttypes>
11
+ #include <deque>
12
+ #include <functional>
13
+ #include <iterator>
14
+ #include <string>
15
+
16
+ #include "rocksdb/status.h"
17
+ #include "rocksdb/types.h"
18
+
19
+ namespace ROCKSDB_NAMESPACE {
20
+
21
+ constexpr uint64_t kUnknownSeqnoTime = 0;
22
+
23
+ // SeqnoToTimeMapping stores the sequence number to time mapping, so given a
24
+ // sequence number it can estimate the oldest possible time for that sequence
25
+ // number. For example:
26
+ // 10 -> 100
27
+ // 50 -> 300
28
+ // then if a key has seqno 19, the OldestApproximateTime would be 100, for 51 it
29
+ // would be 300.
30
+ // As it's a sorted list, the new entry is inserted from the back. The old data
31
+ // will be popped from the front if they're no longer used.
32
+ //
33
+ // Note: the data struct is not thread safe, both read and write need to be
34
+ // synchronized by caller.
35
+ class SeqnoToTimeMapping {
36
+ public:
37
+ // Maximum number of entries can be encoded into SST. The data is delta encode
38
+ // so the maximum data usage for each SST is < 0.3K
39
+ static constexpr uint64_t kMaxSeqnoTimePairsPerSST = 100;
40
+
41
+ // Maximum number of entries per CF. If there's only CF with this feature on,
42
+ // the max duration divided by this number, so for example, if
43
+ // preclude_last_level_data_seconds = 100000 (~1day), then it will sample the
44
+ // seqno -> time every 1000 seconds (~17minutes). Then the maximum entry it
45
+ // needs is 100.
46
+ // When there are multiple CFs having this feature on, the sampling cadence is
47
+ // determined by the smallest setting, the capacity is determined the largest
48
+ // setting, also it's caped by kMaxSeqnoTimePairsPerCF * 10.
49
+ static constexpr uint64_t kMaxSeqnoTimePairsPerCF = 100;
50
+
51
+ // A simple struct for sequence number to time pair
52
+ struct SeqnoTimePair {
53
+ SequenceNumber seqno = 0;
54
+ uint64_t time = 0;
55
+
56
+ SeqnoTimePair() = default;
57
+ SeqnoTimePair(SequenceNumber _seqno, uint64_t _time)
58
+ : seqno(_seqno), time(_time) {}
59
+
60
+ // Encode to dest string
61
+ void Encode(std::string& dest) const;
62
+
63
+ // Decode the value from input Slice and remove it from the input
64
+ Status Decode(Slice& input);
65
+
66
+ // subtraction of 2 SeqnoTimePair
67
+ SeqnoTimePair operator-(const SeqnoTimePair& other) const;
68
+
69
+ // Add 2 values together
70
+ void Add(const SeqnoTimePair& obj) {
71
+ seqno += obj.seqno;
72
+ time += obj.time;
73
+ }
74
+
75
+ // Compare SeqnoTimePair with a sequence number, used for binary search a
76
+ // sequence number in a list of SeqnoTimePair
77
+ bool operator<(const SequenceNumber& other) const { return seqno < other; }
78
+
79
+ // Compare 2 SeqnoTimePair
80
+ bool operator<(const SeqnoTimePair& other) const {
81
+ return std::tie(seqno, time) < std::tie(other.seqno, other.time);
82
+ }
83
+
84
+ // Check if 2 SeqnoTimePair is the same
85
+ bool operator==(const SeqnoTimePair& other) const {
86
+ return std::tie(seqno, time) == std::tie(other.seqno, other.time);
87
+ }
88
+ };
89
+
90
+ // constractor of SeqnoToTimeMapping
91
+ // max_time_duration is the maximum time it should track. For example, if
92
+ // preclude_last_level_data_seconds is 1 day, then if an entry is older than 1
93
+ // day, then it can be removed.
94
+ // max_capacity is the maximum number of entry it can hold. For single CF,
95
+ // it's caped at 100 (kMaxSeqnoTimePairsPerCF), otherwise
96
+ // kMaxSeqnoTimePairsPerCF * 10.
97
+ // If it's set to 0, means it won't truncate any old data.
98
+ explicit SeqnoToTimeMapping(uint64_t max_time_duration = 0,
99
+ uint64_t max_capacity = 0)
100
+ : max_time_duration_(max_time_duration), max_capacity_(max_capacity) {}
101
+
102
+ // Append a new entry to the list. The new entry should be newer than the
103
+ // existing ones. It maintains the internal sorted status.
104
+ bool Append(SequenceNumber seqno, uint64_t time);
105
+
106
+ // Given a sequence number, estimate it's oldest time
107
+ uint64_t GetOldestApproximateTime(SequenceNumber seqno) const;
108
+
109
+ // Truncate the old entries based on the current time and max_time_duration_
110
+ SequenceNumber TruncateOldEntries(uint64_t now);
111
+
112
+ // Encode to a binary string
113
+ void Encode(std::string& des, SequenceNumber start, SequenceNumber end,
114
+ uint64_t now,
115
+ uint64_t output_size = kMaxSeqnoTimePairsPerSST) const;
116
+
117
+ // Add a new random entry, unlike Append(), it can be any data, but also makes
118
+ // the list un-sorted.
119
+ void Add(SequenceNumber seqno, uint64_t time);
120
+
121
+ // Decode and add the entries to the current obj. The list will be unsorted
122
+ Status Add(const std::string& seqno_time_mapping_str);
123
+
124
+ // Return the number of entries
125
+ size_t Size() const { return seqno_time_mapping_.size(); }
126
+
127
+ // Reduce the size of internal list
128
+ bool Resize(uint64_t min_time_duration, uint64_t max_time_duration);
129
+
130
+ // Override the max_time_duration_
131
+ void SetMaxTimeDuration(uint64_t max_time_duration) {
132
+ max_time_duration_ = max_time_duration;
133
+ }
134
+
135
+ uint64_t GetCapacity() const { return max_capacity_; }
136
+
137
+ // Sort the list, which also remove the redundant entries, useless entries,
138
+ // which makes sure the seqno is sorted, but also the time
139
+ Status Sort();
140
+
141
+ // copy the current obj from the given smallest_seqno.
142
+ SeqnoToTimeMapping Copy(SequenceNumber smallest_seqno) const;
143
+
144
+ // If the internal list is empty
145
+ bool Empty() const { return seqno_time_mapping_.empty(); }
146
+
147
+ // clear all entries
148
+ void Clear() { seqno_time_mapping_.clear(); }
149
+
150
+ // return the string for user message
151
+ // Note: Not efficient, okay for print
152
+ std::string ToHumanString() const;
153
+
154
+ #ifndef NDEBUG
155
+ const std::deque<SeqnoTimePair>& TEST_GetInternalMapping() const {
156
+ return seqno_time_mapping_;
157
+ }
158
+ #endif
159
+
160
+ private:
161
+ static constexpr uint64_t kMaxSeqnoToTimeEntries =
162
+ kMaxSeqnoTimePairsPerCF * 10;
163
+
164
+ uint64_t max_time_duration_;
165
+ uint64_t max_capacity_;
166
+
167
+ std::deque<SeqnoTimePair> seqno_time_mapping_;
168
+
169
+ bool is_sorted_ = true;
170
+
171
+ static uint64_t CalculateMaxCapacity(uint64_t min_time_duration,
172
+ uint64_t max_time_duration);
173
+
174
+ SeqnoTimePair& Last() {
175
+ assert(!Empty());
176
+ return seqno_time_mapping_.back();
177
+ }
178
+ };
179
+
180
+ // for searching the sequence number from SeqnoToTimeMapping
181
+ inline bool operator<(const SequenceNumber& seqno,
182
+ const SeqnoToTimeMapping::SeqnoTimePair& other) {
183
+ return seqno < other.seqno;
184
+ }
185
+
186
+ } // namespace ROCKSDB_NAMESPACE
@@ -141,6 +141,7 @@ DECLARE_bool(charge_compression_dictionary_building_buffer);
141
141
  DECLARE_bool(charge_filter_construction);
142
142
  DECLARE_bool(charge_table_reader);
143
143
  DECLARE_bool(charge_file_metadata);
144
+ DECLARE_bool(charge_blob_cache);
144
145
  DECLARE_int32(top_level_index_pinning);
145
146
  DECLARE_int32(partition_pinning);
146
147
  DECLARE_int32(unpartitioned_pinning);
@@ -272,6 +273,7 @@ DECLARE_bool(use_blob_cache);
272
273
  DECLARE_bool(use_shared_block_and_blob_cache);
273
274
  DECLARE_uint64(blob_cache_size);
274
275
  DECLARE_int32(blob_cache_numshardbits);
276
+ DECLARE_int32(prepopulate_blob_cache);
275
277
 
276
278
  DECLARE_int32(approximate_size_one_in);
277
279
  DECLARE_bool(sync_fault_injection);
@@ -316,24 +316,29 @@ DEFINE_bool(cache_index_and_filter_blocks, false,
316
316
 
317
317
  DEFINE_bool(charge_compression_dictionary_building_buffer, false,
318
318
  "Setting for "
319
- "CacheEntryRoleOptions::charged of"
319
+ "CacheEntryRoleOptions::charged of "
320
320
  "CacheEntryRole::kCompressionDictionaryBuildingBuffer");
321
321
 
322
322
  DEFINE_bool(charge_filter_construction, false,
323
323
  "Setting for "
324
- "CacheEntryRoleOptions::charged of"
324
+ "CacheEntryRoleOptions::charged of "
325
325
  "CacheEntryRole::kFilterConstruction");
326
326
 
327
327
  DEFINE_bool(charge_table_reader, false,
328
328
  "Setting for "
329
- "CacheEntryRoleOptions::charged of"
329
+ "CacheEntryRoleOptions::charged of "
330
330
  "CacheEntryRole::kBlockBasedTableReader");
331
331
 
332
332
  DEFINE_bool(charge_file_metadata, false,
333
333
  "Setting for "
334
- "CacheEntryRoleOptions::charged of"
334
+ "CacheEntryRoleOptions::charged of "
335
335
  "kFileMetadata");
336
336
 
337
+ DEFINE_bool(charge_blob_cache, false,
338
+ "Setting for "
339
+ "CacheEntryRoleOptions::charged of "
340
+ "kBlobCache");
341
+
337
342
  DEFINE_int32(
338
343
  top_level_index_pinning,
339
344
  static_cast<int32_t>(ROCKSDB_NAMESPACE::PinningTier::kFallback),
@@ -474,6 +479,10 @@ DEFINE_int32(blob_cache_numshardbits, 6,
474
479
  "the block and blob caches are different "
475
480
  "(use_shared_block_and_blob_cache = false).");
476
481
 
482
+ DEFINE_int32(prepopulate_blob_cache, 0,
483
+ "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
484
+ "to disable and 1 to insert during flush.");
485
+
477
486
  static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
478
487
  RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
479
488
 
@@ -270,6 +270,8 @@ bool StressTest::BuildOptionsTable() {
270
270
  std::vector<std::string>{"0", "1M", "4M"});
271
271
  options_tbl.emplace("blob_file_starting_level",
272
272
  std::vector<std::string>{"0", "1", "2"});
273
+ options_tbl.emplace("prepopulate_blob_cache",
274
+ std::vector<std::string>{"kDisable", "kFlushOnly"});
273
275
  }
274
276
 
275
277
  options_table_ = std::move(options_tbl);
@@ -2401,9 +2403,12 @@ void StressTest::Open(SharedState* shared) {
2401
2403
  fprintf(stdout,
2402
2404
  "Integrated BlobDB: blob cache enabled, block and blob caches "
2403
2405
  "shared: %d, blob cache size %" PRIu64
2404
- ", blob cache num shard bits: %d\n",
2406
+ ", blob cache num shard bits: %d, blob cache prepopulated: %s\n",
2405
2407
  FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size,
2406
- FLAGS_blob_cache_numshardbits);
2408
+ FLAGS_blob_cache_numshardbits,
2409
+ options_.prepopulate_blob_cache == PrepopulateBlobCache::kFlushOnly
2410
+ ? "flush only"
2411
+ : "disable");
2407
2412
  } else {
2408
2413
  fprintf(stdout, "Integrated BlobDB: blob cache disabled\n");
2409
2414
  }
@@ -2902,6 +2907,11 @@ void InitializeOptionsFromFlags(
2902
2907
  {/*.charged = */ FLAGS_charge_file_metadata
2903
2908
  ? CacheEntryRoleOptions::Decision::kEnabled
2904
2909
  : CacheEntryRoleOptions::Decision::kDisabled}});
2910
+ block_based_options.cache_usage_options.options_overrides.insert(
2911
+ {CacheEntryRole::kBlobCache,
2912
+ {/*.charged = */ FLAGS_charge_blob_cache
2913
+ ? CacheEntryRoleOptions::Decision::kEnabled
2914
+ : CacheEntryRoleOptions::Decision::kDisabled}});
2905
2915
  block_based_options.format_version =
2906
2916
  static_cast<uint32_t>(FLAGS_format_version);
2907
2917
  block_based_options.index_block_restart_interval =
@@ -3043,6 +3053,17 @@ void InitializeOptionsFromFlags(
3043
3053
  exit(1);
3044
3054
  }
3045
3055
  }
3056
+ switch (FLAGS_prepopulate_blob_cache) {
3057
+ case 0:
3058
+ options.prepopulate_blob_cache = PrepopulateBlobCache::kDisable;
3059
+ break;
3060
+ case 1:
3061
+ options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
3062
+ break;
3063
+ default:
3064
+ fprintf(stderr, "Unknown prepopulate blob cache mode\n");
3065
+ exit(1);
3066
+ }
3046
3067
  }
3047
3068
 
3048
3069
  options.wal_compression =