@nxtedition/rocksdb 11.0.2 → 11.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +133 -122
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
- package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
- package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
- package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
- package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
- package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
- package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
- package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
- package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
- package/deps/rocksdb/rocksdb/file/filename.h +2 -1
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
- package/index.js +5 -17
- package/iterator.js +9 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
#include "port/likely.h"
|
|
53
53
|
#include "port/port.h"
|
|
54
54
|
#include "rocksdb/slice.h"
|
|
55
|
+
#include "test_util/sync_point.h"
|
|
55
56
|
#include "util/coding.h"
|
|
56
57
|
#include "util/random.h"
|
|
57
58
|
|
|
@@ -169,13 +170,20 @@ class InlineSkipList {
|
|
|
169
170
|
// REQUIRES: Valid()
|
|
170
171
|
void Next();
|
|
171
172
|
|
|
173
|
+
[[nodiscard]] Status NextAndValidate(bool allow_data_in_errors);
|
|
174
|
+
|
|
172
175
|
// Advances to the previous position.
|
|
173
176
|
// REQUIRES: Valid()
|
|
174
177
|
void Prev();
|
|
175
178
|
|
|
179
|
+
[[nodiscard]] Status PrevAndValidate(bool allow_data_in_errors);
|
|
180
|
+
|
|
176
181
|
// Advance to the first entry with a key >= target
|
|
177
182
|
void Seek(const char* target);
|
|
178
183
|
|
|
184
|
+
[[nodiscard]] Status SeekAndValidate(const char* target,
|
|
185
|
+
bool allow_data_in_errors);
|
|
186
|
+
|
|
179
187
|
// Retreat to the last entry with a key <= target
|
|
180
188
|
void SeekForPrev(const char* target);
|
|
181
189
|
|
|
@@ -237,21 +245,20 @@ class InlineSkipList {
|
|
|
237
245
|
bool KeyIsAfterNode(const DecodedKey& key, Node* n) const;
|
|
238
246
|
|
|
239
247
|
// Returns the earliest node with a key >= key.
|
|
240
|
-
//
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
//
|
|
244
|
-
|
|
248
|
+
// Returns nullptr if there is no such node.
|
|
249
|
+
// @param out_of_order_node If not null, will validate the order of visited
|
|
250
|
+
// nodes. If a pair of out-of-order nodes n1 and n2 are found, n1 will be
|
|
251
|
+
// returned and *out_of_order_node will be set to n2.
|
|
252
|
+
Node* FindGreaterOrEqual(const char* key, Node** out_of_order_node) const;
|
|
253
|
+
|
|
254
|
+
// Returns the latest node with a key < key.
|
|
255
|
+
// Returns head_ if there is no such node.
|
|
245
256
|
// Fills prev[level] with pointer to previous node at "level" for every
|
|
246
257
|
// level in [0..max_height_-1], if prev is non-null.
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
//
|
|
250
|
-
|
|
251
|
-
// Fills prev[level] with pointer to previous node at "level" for every
|
|
252
|
-
// level in [bottom_level..top_level-1], if prev is non-null.
|
|
253
|
-
Node* FindLessThan(const char* key, Node** prev, Node* root, int top_level,
|
|
254
|
-
int bottom_level) const;
|
|
258
|
+
// @param out_of_order_node If not null, will validate the order of visited
|
|
259
|
+
// nodes. If a pair of out-of-order nodes n1 and n2 are found, n1 will be
|
|
260
|
+
// returned and *out_of_order_node will be set to n2.
|
|
261
|
+
Node* FindLessThan(const char* key, Node** out_of_order_node) const;
|
|
255
262
|
|
|
256
263
|
// Return the last node in the list.
|
|
257
264
|
// Return head_ if list is empty.
|
|
@@ -274,6 +281,8 @@ class InlineSkipList {
|
|
|
274
281
|
// lowest_level (inclusive).
|
|
275
282
|
void RecomputeSpliceLevels(const DecodedKey& key, Splice* splice,
|
|
276
283
|
int recompute_level);
|
|
284
|
+
|
|
285
|
+
static Status Corruption(Node* prev, Node* next, bool allow_data_in_errors);
|
|
277
286
|
};
|
|
278
287
|
|
|
279
288
|
// Implementation details follow
|
|
@@ -392,20 +401,68 @@ inline void InlineSkipList<Comparator>::Iterator::Next() {
|
|
|
392
401
|
node_ = node_->Next(0);
|
|
393
402
|
}
|
|
394
403
|
|
|
404
|
+
template <class Comparator>
|
|
405
|
+
inline Status InlineSkipList<Comparator>::Iterator::NextAndValidate(
|
|
406
|
+
bool allow_data_in_errors) {
|
|
407
|
+
assert(Valid());
|
|
408
|
+
Node* prev_node = node_;
|
|
409
|
+
node_ = node_->Next(0);
|
|
410
|
+
// Verify that keys are increasing.
|
|
411
|
+
if (prev_node != list_->head_ && node_ != nullptr &&
|
|
412
|
+
list_->compare_(prev_node->Key(), node_->Key()) >= 0) {
|
|
413
|
+
Node* node = node_;
|
|
414
|
+
// invalidates the iterator
|
|
415
|
+
node_ = nullptr;
|
|
416
|
+
return Corruption(prev_node, node, allow_data_in_errors);
|
|
417
|
+
}
|
|
418
|
+
return Status::OK();
|
|
419
|
+
}
|
|
420
|
+
|
|
395
421
|
template <class Comparator>
|
|
396
422
|
inline void InlineSkipList<Comparator>::Iterator::Prev() {
|
|
397
423
|
// Instead of using explicit "prev" links, we just search for the
|
|
398
424
|
// last node that falls before key.
|
|
399
425
|
assert(Valid());
|
|
400
|
-
node_ = list_->FindLessThan(node_->Key());
|
|
426
|
+
node_ = list_->FindLessThan(node_->Key(), nullptr);
|
|
401
427
|
if (node_ == list_->head_) {
|
|
402
428
|
node_ = nullptr;
|
|
403
429
|
}
|
|
404
430
|
}
|
|
405
431
|
|
|
432
|
+
template <class Comparator>
|
|
433
|
+
inline Status InlineSkipList<Comparator>::Iterator::PrevAndValidate(
|
|
434
|
+
const bool allow_data_in_errors) {
|
|
435
|
+
assert(Valid());
|
|
436
|
+
// Skip list validation is done in FindLessThan().
|
|
437
|
+
Node* out_of_order_node = nullptr;
|
|
438
|
+
node_ = list_->FindLessThan(node_->Key(), &out_of_order_node);
|
|
439
|
+
if (out_of_order_node) {
|
|
440
|
+
Node* node = node_;
|
|
441
|
+
node_ = nullptr;
|
|
442
|
+
return Corruption(node, out_of_order_node, allow_data_in_errors);
|
|
443
|
+
}
|
|
444
|
+
if (node_ == list_->head_) {
|
|
445
|
+
node_ = nullptr;
|
|
446
|
+
}
|
|
447
|
+
return Status::OK();
|
|
448
|
+
}
|
|
449
|
+
|
|
406
450
|
template <class Comparator>
|
|
407
451
|
inline void InlineSkipList<Comparator>::Iterator::Seek(const char* target) {
|
|
408
|
-
node_ = list_->FindGreaterOrEqual(target);
|
|
452
|
+
node_ = list_->FindGreaterOrEqual(target, nullptr);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
template <class Comparator>
|
|
456
|
+
inline Status InlineSkipList<Comparator>::Iterator::SeekAndValidate(
|
|
457
|
+
const char* target, const bool allow_data_in_errors) {
|
|
458
|
+
Node* out_of_order_node = nullptr;
|
|
459
|
+
node_ = list_->FindGreaterOrEqual(target, &out_of_order_node);
|
|
460
|
+
if (out_of_order_node) {
|
|
461
|
+
Node* node = node_;
|
|
462
|
+
node_ = nullptr;
|
|
463
|
+
return Corruption(node, out_of_order_node, allow_data_in_errors);
|
|
464
|
+
}
|
|
465
|
+
return Status::OK();
|
|
409
466
|
}
|
|
410
467
|
|
|
411
468
|
template <class Comparator>
|
|
@@ -448,6 +505,7 @@ int InlineSkipList<Comparator>::RandomHeight() {
|
|
|
448
505
|
rnd->Next() < kScaledInverseBranching_) {
|
|
449
506
|
height++;
|
|
450
507
|
}
|
|
508
|
+
TEST_SYNC_POINT_CALLBACK("InlineSkipList::RandomHeight::height", &height);
|
|
451
509
|
assert(height > 0);
|
|
452
510
|
assert(height <= kMaxHeight_);
|
|
453
511
|
assert(height <= kMaxPossibleHeight);
|
|
@@ -472,7 +530,8 @@ bool InlineSkipList<Comparator>::KeyIsAfterNode(const DecodedKey& key,
|
|
|
472
530
|
|
|
473
531
|
template <class Comparator>
|
|
474
532
|
typename InlineSkipList<Comparator>::Node*
|
|
475
|
-
InlineSkipList<Comparator>::FindGreaterOrEqual(
|
|
533
|
+
InlineSkipList<Comparator>::FindGreaterOrEqual(
|
|
534
|
+
const char* key, Node** const out_of_order_node) const {
|
|
476
535
|
// Note: It looks like we could reduce duplication by implementing
|
|
477
536
|
// this function as FindLessThan(key)->Next(0), but we wouldn't be able
|
|
478
537
|
// to exit early on equality and the result wouldn't even be correct.
|
|
@@ -486,6 +545,11 @@ InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
|
|
|
486
545
|
Node* next = x->Next(level);
|
|
487
546
|
if (next != nullptr) {
|
|
488
547
|
PREFETCH(next->Next(level), 0, 1);
|
|
548
|
+
if (out_of_order_node && x != head_ &&
|
|
549
|
+
compare_(x->Key(), next->Key()) >= 0) {
|
|
550
|
+
*out_of_order_node = next;
|
|
551
|
+
return x;
|
|
552
|
+
}
|
|
489
553
|
}
|
|
490
554
|
// Make sure the lists are sorted
|
|
491
555
|
assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
|
|
@@ -509,18 +573,11 @@ InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
|
|
|
509
573
|
|
|
510
574
|
template <class Comparator>
|
|
511
575
|
typename InlineSkipList<Comparator>::Node*
|
|
512
|
-
InlineSkipList<Comparator>::FindLessThan(const char* key,
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
typename InlineSkipList<Comparator>::Node*
|
|
518
|
-
InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
|
|
519
|
-
Node* root, int top_level,
|
|
520
|
-
int bottom_level) const {
|
|
521
|
-
assert(top_level > bottom_level);
|
|
522
|
-
int level = top_level - 1;
|
|
523
|
-
Node* x = root;
|
|
576
|
+
InlineSkipList<Comparator>::FindLessThan(const char* key,
|
|
577
|
+
Node** const out_of_order_node) const {
|
|
578
|
+
int level = GetMaxHeight() - 1;
|
|
579
|
+
assert(level >= 0);
|
|
580
|
+
Node* x = head_;
|
|
524
581
|
// KeyIsAfter(key, last_not_after) is definitely false
|
|
525
582
|
Node* last_not_after = nullptr;
|
|
526
583
|
const DecodedKey key_decoded = compare_.decode_key(key);
|
|
@@ -529,6 +586,11 @@ InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
|
|
|
529
586
|
Node* next = x->Next(level);
|
|
530
587
|
if (next != nullptr) {
|
|
531
588
|
PREFETCH(next->Next(level), 0, 1);
|
|
589
|
+
if (out_of_order_node && x != head_ &&
|
|
590
|
+
compare_(x->Key(), next->Key()) >= 0) {
|
|
591
|
+
*out_of_order_node = next;
|
|
592
|
+
return x;
|
|
593
|
+
}
|
|
532
594
|
}
|
|
533
595
|
assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
|
|
534
596
|
assert(x == head_ || KeyIsAfterNode(key_decoded, x));
|
|
@@ -537,10 +599,7 @@ InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
|
|
|
537
599
|
assert(next != nullptr);
|
|
538
600
|
x = next;
|
|
539
601
|
} else {
|
|
540
|
-
if (
|
|
541
|
-
prev[level] = x;
|
|
542
|
-
}
|
|
543
|
-
if (level == bottom_level) {
|
|
602
|
+
if (level == 0) {
|
|
544
603
|
return x;
|
|
545
604
|
} else {
|
|
546
605
|
// Switch to next list, reuse KeyIsAfterNode() result
|
|
@@ -999,7 +1058,7 @@ bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice,
|
|
|
999
1058
|
|
|
1000
1059
|
template <class Comparator>
|
|
1001
1060
|
bool InlineSkipList<Comparator>::Contains(const char* key) const {
|
|
1002
|
-
Node* x = FindGreaterOrEqual(key);
|
|
1061
|
+
Node* x = FindGreaterOrEqual(key, nullptr);
|
|
1003
1062
|
if (x != nullptr && Equal(key, x->Key())) {
|
|
1004
1063
|
return true;
|
|
1005
1064
|
} else {
|
|
@@ -1048,4 +1107,14 @@ void InlineSkipList<Comparator>::TEST_Validate() const {
|
|
|
1048
1107
|
}
|
|
1049
1108
|
}
|
|
1050
1109
|
|
|
1110
|
+
template <class Comparator>
|
|
1111
|
+
Status InlineSkipList<Comparator>::Corruption(Node* prev, Node* next,
|
|
1112
|
+
bool allow_data_in_errors) {
|
|
1113
|
+
std::string msg = "Out-of-order keys found in skiplist.";
|
|
1114
|
+
if (allow_data_in_errors) {
|
|
1115
|
+
msg.append(" prev key: " + Slice(prev->Key()).ToString(true));
|
|
1116
|
+
msg.append(" next key: " + Slice(next->Key()).ToString(true));
|
|
1117
|
+
}
|
|
1118
|
+
return Status::Corruption(msg);
|
|
1119
|
+
}
|
|
1051
1120
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -92,6 +92,20 @@ class SkipListRep : public MemTableRep {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
Status GetAndValidate(const LookupKey& k, void* callback_args,
|
|
96
|
+
bool (*callback_func)(void* arg, const char* entry),
|
|
97
|
+
bool allow_data_in_errors) override {
|
|
98
|
+
SkipListRep::Iterator iter(&skip_list_);
|
|
99
|
+
Slice dummy_slice;
|
|
100
|
+
Status status = iter.SeekAndValidate(dummy_slice, k.memtable_key().data(),
|
|
101
|
+
allow_data_in_errors);
|
|
102
|
+
for (; iter.Valid() && status.ok() &&
|
|
103
|
+
callback_func(callback_args, iter.key());
|
|
104
|
+
status = iter.NextAndValidate(allow_data_in_errors)) {
|
|
105
|
+
}
|
|
106
|
+
return status;
|
|
107
|
+
}
|
|
108
|
+
|
|
95
109
|
uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
|
96
110
|
const Slice& end_ikey) override {
|
|
97
111
|
std::string tmp;
|
|
@@ -181,15 +195,24 @@ class SkipListRep : public MemTableRep {
|
|
|
181
195
|
|
|
182
196
|
// Returns the key at the current position.
|
|
183
197
|
// REQUIRES: Valid()
|
|
184
|
-
const char* key() const override {
|
|
198
|
+
const char* key() const override {
|
|
199
|
+
assert(Valid());
|
|
200
|
+
return iter_.key();
|
|
201
|
+
}
|
|
185
202
|
|
|
186
203
|
// Advances to the next position.
|
|
187
204
|
// REQUIRES: Valid()
|
|
188
|
-
void Next() override {
|
|
205
|
+
void Next() override {
|
|
206
|
+
assert(Valid());
|
|
207
|
+
iter_.Next();
|
|
208
|
+
}
|
|
189
209
|
|
|
190
210
|
// Advances to the previous position.
|
|
191
211
|
// REQUIRES: Valid()
|
|
192
|
-
void Prev() override {
|
|
212
|
+
void Prev() override {
|
|
213
|
+
assert(Valid());
|
|
214
|
+
iter_.Prev();
|
|
215
|
+
}
|
|
193
216
|
|
|
194
217
|
// Advance to the first entry with a key >= target
|
|
195
218
|
void Seek(const Slice& user_key, const char* memtable_key) override {
|
|
@@ -219,6 +242,26 @@ class SkipListRep : public MemTableRep {
|
|
|
219
242
|
// Final state of iterator is Valid() iff list is not empty.
|
|
220
243
|
void SeekToLast() override { iter_.SeekToLast(); }
|
|
221
244
|
|
|
245
|
+
Status NextAndValidate(bool allow_data_in_errors) override {
|
|
246
|
+
assert(Valid());
|
|
247
|
+
return iter_.NextAndValidate(allow_data_in_errors);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
Status SeekAndValidate(const Slice& user_key, const char* memtable_key,
|
|
251
|
+
bool allow_data_in_errors) override {
|
|
252
|
+
if (memtable_key != nullptr) {
|
|
253
|
+
return iter_.SeekAndValidate(memtable_key, allow_data_in_errors);
|
|
254
|
+
} else {
|
|
255
|
+
return iter_.SeekAndValidate(EncodeKey(&tmp_, user_key),
|
|
256
|
+
allow_data_in_errors);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
Status PrevAndValidate(bool allow_data_in_error) override {
|
|
261
|
+
assert(Valid());
|
|
262
|
+
return iter_.PrevAndValidate(allow_data_in_error);
|
|
263
|
+
}
|
|
264
|
+
|
|
222
265
|
protected:
|
|
223
266
|
std::string tmp_; // For passing to EncodeKey
|
|
224
267
|
};
|
|
@@ -266,6 +266,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|
|
266
266
|
{PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"},
|
|
267
267
|
{PREFETCH_HITS, "rocksdb.prefetch.hits"},
|
|
268
268
|
{SST_FOOTER_CORRUPTION_COUNT, "rocksdb.footer.corruption.count"},
|
|
269
|
+
{FILE_READ_CORRUPTION_RETRY_COUNT,
|
|
270
|
+
"rocksdb.file.read.corruption.retry.count"},
|
|
271
|
+
{FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
|
|
272
|
+
"rocksdb.file.read.corruption.retry.success.count"},
|
|
269
273
|
};
|
|
270
274
|
|
|
271
275
|
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
|
@@ -531,6 +531,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|
|
531
531
|
{offsetof(struct MutableCFOptions, block_protection_bytes_per_key),
|
|
532
532
|
OptionType::kUInt8T, OptionVerificationType::kNormal,
|
|
533
533
|
OptionTypeFlags::kMutable}},
|
|
534
|
+
{"paranoid_memory_checks",
|
|
535
|
+
{offsetof(struct MutableCFOptions, paranoid_memory_checks),
|
|
536
|
+
OptionType::kBoolean, OptionVerificationType::kNormal,
|
|
537
|
+
OptionTypeFlags::kMutable}},
|
|
534
538
|
{kOptNameCompOpts,
|
|
535
539
|
OptionTypeInfo::Struct(
|
|
536
540
|
kOptNameCompOpts, &compression_options_type_info,
|
|
@@ -1104,6 +1108,8 @@ void MutableCFOptions::Dump(Logger* log) const {
|
|
|
1104
1108
|
ttl);
|
|
1105
1109
|
ROCKS_LOG_INFO(log, " periodic_compaction_seconds: %" PRIu64,
|
|
1106
1110
|
periodic_compaction_seconds);
|
|
1111
|
+
ROCKS_LOG_INFO(log, " paranoid_memory_checks: %d",
|
|
1112
|
+
paranoid_memory_checks);
|
|
1107
1113
|
std::string result;
|
|
1108
1114
|
char buf[10];
|
|
1109
1115
|
for (const auto m : max_bytes_for_level_multiplier_additional) {
|
|
@@ -168,6 +168,7 @@ struct MutableCFOptions {
|
|
|
168
168
|
memtable_protection_bytes_per_key(
|
|
169
169
|
options.memtable_protection_bytes_per_key),
|
|
170
170
|
block_protection_bytes_per_key(options.block_protection_bytes_per_key),
|
|
171
|
+
paranoid_memory_checks(options.paranoid_memory_checks),
|
|
171
172
|
sample_for_compression(
|
|
172
173
|
options.sample_for_compression), // TODO: is 0 fine here?
|
|
173
174
|
compression_per_level(options.compression_per_level),
|
|
@@ -317,6 +318,7 @@ struct MutableCFOptions {
|
|
|
317
318
|
Temperature default_write_temperature;
|
|
318
319
|
uint32_t memtable_protection_bytes_per_key;
|
|
319
320
|
uint8_t block_protection_bytes_per_key;
|
|
321
|
+
bool paranoid_memory_checks;
|
|
320
322
|
|
|
321
323
|
uint64_t sample_for_compression;
|
|
322
324
|
std::vector<CompressionType> compression_per_level;
|
|
@@ -576,6 +576,14 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|
|
576
576
|
{offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms),
|
|
577
577
|
OptionType::kUInt64T, OptionVerificationType::kNormal,
|
|
578
578
|
OptionTypeFlags::kNone}},
|
|
579
|
+
{"metadata_write_temperature",
|
|
580
|
+
{offsetof(struct ImmutableDBOptions, metadata_write_temperature),
|
|
581
|
+
OptionType::kTemperature, OptionVerificationType::kNormal,
|
|
582
|
+
OptionTypeFlags::kNone}},
|
|
583
|
+
{"wal_write_temperature",
|
|
584
|
+
{offsetof(struct ImmutableDBOptions, wal_write_temperature),
|
|
585
|
+
OptionType::kTemperature, OptionVerificationType::kNormal,
|
|
586
|
+
OptionTypeFlags::kNone}},
|
|
579
587
|
};
|
|
580
588
|
|
|
581
589
|
const std::string OptionsHelper::kDBOptionsName = "DBOptions";
|
|
@@ -778,7 +786,9 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
|
|
|
778
786
|
follower_refresh_catchup_period_ms(
|
|
779
787
|
options.follower_refresh_catchup_period_ms),
|
|
780
788
|
follower_catchup_retry_count(options.follower_catchup_retry_count),
|
|
781
|
-
follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms)
|
|
789
|
+
follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms),
|
|
790
|
+
metadata_write_temperature(options.metadata_write_temperature),
|
|
791
|
+
wal_write_temperature(options.wal_write_temperature) {
|
|
782
792
|
fs = env->GetFileSystem();
|
|
783
793
|
clock = env->GetSystemClock().get();
|
|
784
794
|
logger = info_log.get();
|
|
@@ -956,6 +966,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
|
|
|
956
966
|
db_host_id.c_str());
|
|
957
967
|
ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s",
|
|
958
968
|
enforce_single_del_contracts ? "true" : "false");
|
|
969
|
+
ROCKS_LOG_HEADER(log, " Options.metadata_write_temperature: %s",
|
|
970
|
+
temperature_to_string[metadata_write_temperature].c_str());
|
|
971
|
+
ROCKS_LOG_HEADER(log, " Options.wal_write_temperature: %s",
|
|
972
|
+
temperature_to_string[wal_write_temperature].c_str());
|
|
959
973
|
}
|
|
960
974
|
|
|
961
975
|
bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {
|
|
@@ -103,6 +103,8 @@ struct ImmutableDBOptions {
|
|
|
103
103
|
uint64_t follower_refresh_catchup_period_ms;
|
|
104
104
|
uint64_t follower_catchup_retry_count;
|
|
105
105
|
uint64_t follower_catchup_retry_wait_ms;
|
|
106
|
+
Temperature metadata_write_temperature;
|
|
107
|
+
Temperature wal_write_temperature;
|
|
106
108
|
|
|
107
109
|
// Beginning convenience/helper objects that are not part of the base
|
|
108
110
|
// DBOptions
|
|
@@ -180,6 +180,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
|
|
|
180
180
|
options.enforce_single_del_contracts =
|
|
181
181
|
immutable_db_options.enforce_single_del_contracts;
|
|
182
182
|
options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc;
|
|
183
|
+
options.follower_refresh_catchup_period_ms =
|
|
184
|
+
immutable_db_options.follower_refresh_catchup_period_ms;
|
|
185
|
+
options.follower_catchup_retry_count =
|
|
186
|
+
immutable_db_options.follower_catchup_retry_count;
|
|
187
|
+
options.follower_catchup_retry_wait_ms =
|
|
188
|
+
immutable_db_options.follower_catchup_retry_wait_ms;
|
|
189
|
+
options.metadata_write_temperature =
|
|
190
|
+
immutable_db_options.metadata_write_temperature;
|
|
191
|
+
options.wal_write_temperature = immutable_db_options.wal_write_temperature;
|
|
183
192
|
return options;
|
|
184
193
|
}
|
|
185
194
|
|
|
@@ -213,6 +222,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
|
|
|
213
222
|
moptions.memtable_protection_bytes_per_key;
|
|
214
223
|
cf_opts->block_protection_bytes_per_key =
|
|
215
224
|
moptions.block_protection_bytes_per_key;
|
|
225
|
+
cf_opts->paranoid_memory_checks = moptions.paranoid_memory_checks;
|
|
216
226
|
cf_opts->bottommost_file_compaction_delay =
|
|
217
227
|
moptions.bottommost_file_compaction_delay;
|
|
218
228
|
|
|
@@ -69,8 +69,9 @@ Status PersistRocksDBOptions(const WriteOptions& write_options,
|
|
|
69
69
|
}
|
|
70
70
|
std::unique_ptr<FSWritableFile> wf;
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
FileOptions file_options;
|
|
73
|
+
file_options.temperature = db_opt.metadata_write_temperature;
|
|
74
|
+
Status s = fs->NewWritableFile(file_name, file_options, &wf, nullptr);
|
|
74
75
|
if (!s.ok()) {
|
|
75
76
|
return s;
|
|
76
77
|
}
|
|
@@ -188,6 +188,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
|
|
|
188
188
|
"block_size_deviation=8;block_restart_interval=4; "
|
|
189
189
|
"metadata_block_size=1024;"
|
|
190
190
|
"partition_filters=false;"
|
|
191
|
+
"decouple_partitioned_filters=true;"
|
|
191
192
|
"optimize_filters_for_memory=true;"
|
|
192
193
|
"use_delta_encoding=true;"
|
|
193
194
|
"index_block_restart_interval=4;"
|
|
@@ -366,7 +367,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
|
|
|
366
367
|
"lowest_used_cache_tier=kNonVolatileBlockTier;"
|
|
367
368
|
"allow_data_in_errors=false;"
|
|
368
369
|
"enforce_single_del_contracts=false;"
|
|
369
|
-
"daily_offpeak_time_utc=08:30-19:00;"
|
|
370
|
+
"daily_offpeak_time_utc=08:30-19:00;"
|
|
371
|
+
"follower_refresh_catchup_period_ms=123;"
|
|
372
|
+
"follower_catchup_retry_count=456;"
|
|
373
|
+
"follower_catchup_retry_wait_ms=789;"
|
|
374
|
+
"metadata_write_temperature=kCold;"
|
|
375
|
+
"wal_write_temperature=kHot;",
|
|
370
376
|
new_options));
|
|
371
377
|
|
|
372
378
|
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),
|
|
@@ -567,7 +573,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|
|
567
573
|
"block_protection_bytes_per_key=1;"
|
|
568
574
|
"memtable_max_range_deletions=999999;"
|
|
569
575
|
"bottommost_file_compaction_delay=7200;"
|
|
570
|
-
"uncache_aggressiveness=1234;"
|
|
576
|
+
"uncache_aggressiveness=1234;"
|
|
577
|
+
"paranoid_memory_checks=1;",
|
|
571
578
|
new_options));
|
|
572
579
|
|
|
573
580
|
ASSERT_NE(new_options->blob_cache.get(), nullptr);
|