@nxtedition/rocksdb 11.0.2 → 11.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/binding.cc +133 -122
  2. package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
  3. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  4. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
  5. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
  6. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
  8. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
  9. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
  10. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
  11. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
  12. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
  13. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
  14. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  15. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
  16. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
  17. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
  18. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
  19. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
  20. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
  21. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
  22. package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
  23. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  24. package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
  25. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
  26. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
  27. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
  29. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  31. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
  32. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
  33. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
  34. package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
  35. package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
  36. package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
  37. package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
  38. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
  39. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
  40. package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
  41. package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
  42. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
  43. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
  44. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
  45. package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
  46. package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
  56. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
  60. package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
  61. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
  62. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
  63. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
  64. package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
  65. package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
  66. package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
  67. package/deps/rocksdb/rocksdb/file/filename.h +2 -1
  68. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
  69. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
  70. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
  71. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
  73. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
  75. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
  76. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
  77. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
  80. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
  81. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  84. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
  85. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
  86. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
  87. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
  88. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
  89. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  90. package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
  91. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  92. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
  93. package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
  94. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
  99. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
  100. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
  101. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
  102. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
  103. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
  104. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
  105. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
  106. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
  107. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
  108. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
  110. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
  111. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
  112. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  113. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
  114. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
  115. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
  116. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
  117. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
  118. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
  119. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
  120. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
  121. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  122. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
  123. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
  124. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  125. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
  126. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
  127. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
  128. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  129. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
  130. package/index.js +5 -17
  131. package/iterator.js +9 -1
  132. package/package.json +1 -1
  133. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  134. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -52,6 +52,7 @@
52
52
  #include "port/likely.h"
53
53
  #include "port/port.h"
54
54
  #include "rocksdb/slice.h"
55
+ #include "test_util/sync_point.h"
55
56
  #include "util/coding.h"
56
57
  #include "util/random.h"
57
58
 
@@ -169,13 +170,20 @@ class InlineSkipList {
169
170
  // REQUIRES: Valid()
170
171
  void Next();
171
172
 
173
+ [[nodiscard]] Status NextAndValidate(bool allow_data_in_errors);
174
+
172
175
  // Advances to the previous position.
173
176
  // REQUIRES: Valid()
174
177
  void Prev();
175
178
 
179
+ [[nodiscard]] Status PrevAndValidate(bool allow_data_in_errors);
180
+
176
181
  // Advance to the first entry with a key >= target
177
182
  void Seek(const char* target);
178
183
 
184
+ [[nodiscard]] Status SeekAndValidate(const char* target,
185
+ bool allow_data_in_errors);
186
+
179
187
  // Retreat to the last entry with a key <= target
180
188
  void SeekForPrev(const char* target);
181
189
 
@@ -237,21 +245,20 @@ class InlineSkipList {
237
245
  bool KeyIsAfterNode(const DecodedKey& key, Node* n) const;
238
246
 
239
247
  // Returns the earliest node with a key >= key.
240
- // Return nullptr if there is no such node.
241
- Node* FindGreaterOrEqual(const char* key) const;
242
-
243
- // Return the latest node with a key < key.
244
- // Return head_ if there is no such node.
248
+ // Returns nullptr if there is no such node.
249
+ // @param out_of_order_node If not null, will validate the order of visited
250
+ // nodes. If a pair of out-of-order nodes n1 and n2 are found, n1 will be
251
+ // returned and *out_of_order_node will be set to n2.
252
+ Node* FindGreaterOrEqual(const char* key, Node** out_of_order_node) const;
253
+
254
+ // Returns the latest node with a key < key.
255
+ // Returns head_ if there is no such node.
245
256
  // Fills prev[level] with pointer to previous node at "level" for every
246
257
  // level in [0..max_height_-1], if prev is non-null.
247
- Node* FindLessThan(const char* key, Node** prev = nullptr) const;
248
-
249
- // Return the latest node with a key < key on bottom_level. Start searching
250
- // from root node on the level below top_level.
251
- // Fills prev[level] with pointer to previous node at "level" for every
252
- // level in [bottom_level..top_level-1], if prev is non-null.
253
- Node* FindLessThan(const char* key, Node** prev, Node* root, int top_level,
254
- int bottom_level) const;
258
+ // @param out_of_order_node If not null, will validate the order of visited
259
+ // nodes. If a pair of out-of-order nodes n1 and n2 are found, n1 will be
260
+ // returned and *out_of_order_node will be set to n2.
261
+ Node* FindLessThan(const char* key, Node** out_of_order_node) const;
255
262
 
256
263
  // Return the last node in the list.
257
264
  // Return head_ if list is empty.
@@ -274,6 +281,8 @@ class InlineSkipList {
274
281
  // lowest_level (inclusive).
275
282
  void RecomputeSpliceLevels(const DecodedKey& key, Splice* splice,
276
283
  int recompute_level);
284
+
285
+ static Status Corruption(Node* prev, Node* next, bool allow_data_in_errors);
277
286
  };
278
287
 
279
288
  // Implementation details follow
@@ -392,20 +401,68 @@ inline void InlineSkipList<Comparator>::Iterator::Next() {
392
401
  node_ = node_->Next(0);
393
402
  }
394
403
 
404
+ template <class Comparator>
405
+ inline Status InlineSkipList<Comparator>::Iterator::NextAndValidate(
406
+ bool allow_data_in_errors) {
407
+ assert(Valid());
408
+ Node* prev_node = node_;
409
+ node_ = node_->Next(0);
410
+ // Verify that keys are increasing.
411
+ if (prev_node != list_->head_ && node_ != nullptr &&
412
+ list_->compare_(prev_node->Key(), node_->Key()) >= 0) {
413
+ Node* node = node_;
414
+ // invalidates the iterator
415
+ node_ = nullptr;
416
+ return Corruption(prev_node, node, allow_data_in_errors);
417
+ }
418
+ return Status::OK();
419
+ }
420
+
395
421
  template <class Comparator>
396
422
  inline void InlineSkipList<Comparator>::Iterator::Prev() {
397
423
  // Instead of using explicit "prev" links, we just search for the
398
424
  // last node that falls before key.
399
425
  assert(Valid());
400
- node_ = list_->FindLessThan(node_->Key());
426
+ node_ = list_->FindLessThan(node_->Key(), nullptr);
401
427
  if (node_ == list_->head_) {
402
428
  node_ = nullptr;
403
429
  }
404
430
  }
405
431
 
432
+ template <class Comparator>
433
+ inline Status InlineSkipList<Comparator>::Iterator::PrevAndValidate(
434
+ const bool allow_data_in_errors) {
435
+ assert(Valid());
436
+ // Skip list validation is done in FindLessThan().
437
+ Node* out_of_order_node = nullptr;
438
+ node_ = list_->FindLessThan(node_->Key(), &out_of_order_node);
439
+ if (out_of_order_node) {
440
+ Node* node = node_;
441
+ node_ = nullptr;
442
+ return Corruption(node, out_of_order_node, allow_data_in_errors);
443
+ }
444
+ if (node_ == list_->head_) {
445
+ node_ = nullptr;
446
+ }
447
+ return Status::OK();
448
+ }
449
+
406
450
  template <class Comparator>
407
451
  inline void InlineSkipList<Comparator>::Iterator::Seek(const char* target) {
408
- node_ = list_->FindGreaterOrEqual(target);
452
+ node_ = list_->FindGreaterOrEqual(target, nullptr);
453
+ }
454
+
455
+ template <class Comparator>
456
+ inline Status InlineSkipList<Comparator>::Iterator::SeekAndValidate(
457
+ const char* target, const bool allow_data_in_errors) {
458
+ Node* out_of_order_node = nullptr;
459
+ node_ = list_->FindGreaterOrEqual(target, &out_of_order_node);
460
+ if (out_of_order_node) {
461
+ Node* node = node_;
462
+ node_ = nullptr;
463
+ return Corruption(node, out_of_order_node, allow_data_in_errors);
464
+ }
465
+ return Status::OK();
409
466
  }
410
467
 
411
468
  template <class Comparator>
@@ -448,6 +505,7 @@ int InlineSkipList<Comparator>::RandomHeight() {
448
505
  rnd->Next() < kScaledInverseBranching_) {
449
506
  height++;
450
507
  }
508
+ TEST_SYNC_POINT_CALLBACK("InlineSkipList::RandomHeight::height", &height);
451
509
  assert(height > 0);
452
510
  assert(height <= kMaxHeight_);
453
511
  assert(height <= kMaxPossibleHeight);
@@ -472,7 +530,8 @@ bool InlineSkipList<Comparator>::KeyIsAfterNode(const DecodedKey& key,
472
530
 
473
531
  template <class Comparator>
474
532
  typename InlineSkipList<Comparator>::Node*
475
- InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
533
+ InlineSkipList<Comparator>::FindGreaterOrEqual(
534
+ const char* key, Node** const out_of_order_node) const {
476
535
  // Note: It looks like we could reduce duplication by implementing
477
536
  // this function as FindLessThan(key)->Next(0), but we wouldn't be able
478
537
  // to exit early on equality and the result wouldn't even be correct.
@@ -486,6 +545,11 @@ InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
486
545
  Node* next = x->Next(level);
487
546
  if (next != nullptr) {
488
547
  PREFETCH(next->Next(level), 0, 1);
548
+ if (out_of_order_node && x != head_ &&
549
+ compare_(x->Key(), next->Key()) >= 0) {
550
+ *out_of_order_node = next;
551
+ return x;
552
+ }
489
553
  }
490
554
  // Make sure the lists are sorted
491
555
  assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
@@ -509,18 +573,11 @@ InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
509
573
 
510
574
  template <class Comparator>
511
575
  typename InlineSkipList<Comparator>::Node*
512
- InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev) const {
513
- return FindLessThan(key, prev, head_, GetMaxHeight(), 0);
514
- }
515
-
516
- template <class Comparator>
517
- typename InlineSkipList<Comparator>::Node*
518
- InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
519
- Node* root, int top_level,
520
- int bottom_level) const {
521
- assert(top_level > bottom_level);
522
- int level = top_level - 1;
523
- Node* x = root;
576
+ InlineSkipList<Comparator>::FindLessThan(const char* key,
577
+ Node** const out_of_order_node) const {
578
+ int level = GetMaxHeight() - 1;
579
+ assert(level >= 0);
580
+ Node* x = head_;
524
581
  // KeyIsAfter(key, last_not_after) is definitely false
525
582
  Node* last_not_after = nullptr;
526
583
  const DecodedKey key_decoded = compare_.decode_key(key);
@@ -529,6 +586,11 @@ InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
529
586
  Node* next = x->Next(level);
530
587
  if (next != nullptr) {
531
588
  PREFETCH(next->Next(level), 0, 1);
589
+ if (out_of_order_node && x != head_ &&
590
+ compare_(x->Key(), next->Key()) >= 0) {
591
+ *out_of_order_node = next;
592
+ return x;
593
+ }
532
594
  }
533
595
  assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
534
596
  assert(x == head_ || KeyIsAfterNode(key_decoded, x));
@@ -537,10 +599,7 @@ InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
537
599
  assert(next != nullptr);
538
600
  x = next;
539
601
  } else {
540
- if (prev != nullptr) {
541
- prev[level] = x;
542
- }
543
- if (level == bottom_level) {
602
+ if (level == 0) {
544
603
  return x;
545
604
  } else {
546
605
  // Switch to next list, reuse KeyIsAfterNode() result
@@ -999,7 +1058,7 @@ bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice,
999
1058
 
1000
1059
  template <class Comparator>
1001
1060
  bool InlineSkipList<Comparator>::Contains(const char* key) const {
1002
- Node* x = FindGreaterOrEqual(key);
1061
+ Node* x = FindGreaterOrEqual(key, nullptr);
1003
1062
  if (x != nullptr && Equal(key, x->Key())) {
1004
1063
  return true;
1005
1064
  } else {
@@ -1048,4 +1107,14 @@ void InlineSkipList<Comparator>::TEST_Validate() const {
1048
1107
  }
1049
1108
  }
1050
1109
 
1110
+ template <class Comparator>
1111
+ Status InlineSkipList<Comparator>::Corruption(Node* prev, Node* next,
1112
+ bool allow_data_in_errors) {
1113
+ std::string msg = "Out-of-order keys found in skiplist.";
1114
+ if (allow_data_in_errors) {
1115
+ msg.append(" prev key: " + Slice(prev->Key()).ToString(true));
1116
+ msg.append(" next key: " + Slice(next->Key()).ToString(true));
1117
+ }
1118
+ return Status::Corruption(msg);
1119
+ }
1051
1120
  } // namespace ROCKSDB_NAMESPACE
@@ -92,6 +92,20 @@ class SkipListRep : public MemTableRep {
92
92
  }
93
93
  }
94
94
 
95
+ Status GetAndValidate(const LookupKey& k, void* callback_args,
96
+ bool (*callback_func)(void* arg, const char* entry),
97
+ bool allow_data_in_errors) override {
98
+ SkipListRep::Iterator iter(&skip_list_);
99
+ Slice dummy_slice;
100
+ Status status = iter.SeekAndValidate(dummy_slice, k.memtable_key().data(),
101
+ allow_data_in_errors);
102
+ for (; iter.Valid() && status.ok() &&
103
+ callback_func(callback_args, iter.key());
104
+ status = iter.NextAndValidate(allow_data_in_errors)) {
105
+ }
106
+ return status;
107
+ }
108
+
95
109
  uint64_t ApproximateNumEntries(const Slice& start_ikey,
96
110
  const Slice& end_ikey) override {
97
111
  std::string tmp;
@@ -181,15 +195,24 @@ class SkipListRep : public MemTableRep {
181
195
 
182
196
  // Returns the key at the current position.
183
197
  // REQUIRES: Valid()
184
- const char* key() const override { return iter_.key(); }
198
+ const char* key() const override {
199
+ assert(Valid());
200
+ return iter_.key();
201
+ }
185
202
 
186
203
  // Advances to the next position.
187
204
  // REQUIRES: Valid()
188
- void Next() override { iter_.Next(); }
205
+ void Next() override {
206
+ assert(Valid());
207
+ iter_.Next();
208
+ }
189
209
 
190
210
  // Advances to the previous position.
191
211
  // REQUIRES: Valid()
192
- void Prev() override { iter_.Prev(); }
212
+ void Prev() override {
213
+ assert(Valid());
214
+ iter_.Prev();
215
+ }
193
216
 
194
217
  // Advance to the first entry with a key >= target
195
218
  void Seek(const Slice& user_key, const char* memtable_key) override {
@@ -219,6 +242,26 @@ class SkipListRep : public MemTableRep {
219
242
  // Final state of iterator is Valid() iff list is not empty.
220
243
  void SeekToLast() override { iter_.SeekToLast(); }
221
244
 
245
+ Status NextAndValidate(bool allow_data_in_errors) override {
246
+ assert(Valid());
247
+ return iter_.NextAndValidate(allow_data_in_errors);
248
+ }
249
+
250
+ Status SeekAndValidate(const Slice& user_key, const char* memtable_key,
251
+ bool allow_data_in_errors) override {
252
+ if (memtable_key != nullptr) {
253
+ return iter_.SeekAndValidate(memtable_key, allow_data_in_errors);
254
+ } else {
255
+ return iter_.SeekAndValidate(EncodeKey(&tmp_, user_key),
256
+ allow_data_in_errors);
257
+ }
258
+ }
259
+
260
+ Status PrevAndValidate(bool allow_data_in_error) override {
261
+ assert(Valid());
262
+ return iter_.PrevAndValidate(allow_data_in_error);
263
+ }
264
+
222
265
  protected:
223
266
  std::string tmp_; // For passing to EncodeKey
224
267
  };
@@ -266,6 +266,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
266
266
  {PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"},
267
267
  {PREFETCH_HITS, "rocksdb.prefetch.hits"},
268
268
  {SST_FOOTER_CORRUPTION_COUNT, "rocksdb.footer.corruption.count"},
269
+ {FILE_READ_CORRUPTION_RETRY_COUNT,
270
+ "rocksdb.file.read.corruption.retry.count"},
271
+ {FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
272
+ "rocksdb.file.read.corruption.retry.success.count"},
269
273
  };
270
274
 
271
275
  const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
@@ -531,6 +531,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
531
531
  {offsetof(struct MutableCFOptions, block_protection_bytes_per_key),
532
532
  OptionType::kUInt8T, OptionVerificationType::kNormal,
533
533
  OptionTypeFlags::kMutable}},
534
+ {"paranoid_memory_checks",
535
+ {offsetof(struct MutableCFOptions, paranoid_memory_checks),
536
+ OptionType::kBoolean, OptionVerificationType::kNormal,
537
+ OptionTypeFlags::kMutable}},
534
538
  {kOptNameCompOpts,
535
539
  OptionTypeInfo::Struct(
536
540
  kOptNameCompOpts, &compression_options_type_info,
@@ -1104,6 +1108,8 @@ void MutableCFOptions::Dump(Logger* log) const {
1104
1108
  ttl);
1105
1109
  ROCKS_LOG_INFO(log, " periodic_compaction_seconds: %" PRIu64,
1106
1110
  periodic_compaction_seconds);
1111
+ ROCKS_LOG_INFO(log, " paranoid_memory_checks: %d",
1112
+ paranoid_memory_checks);
1107
1113
  std::string result;
1108
1114
  char buf[10];
1109
1115
  for (const auto m : max_bytes_for_level_multiplier_additional) {
@@ -168,6 +168,7 @@ struct MutableCFOptions {
168
168
  memtable_protection_bytes_per_key(
169
169
  options.memtable_protection_bytes_per_key),
170
170
  block_protection_bytes_per_key(options.block_protection_bytes_per_key),
171
+ paranoid_memory_checks(options.paranoid_memory_checks),
171
172
  sample_for_compression(
172
173
  options.sample_for_compression), // TODO: is 0 fine here?
173
174
  compression_per_level(options.compression_per_level),
@@ -317,6 +318,7 @@ struct MutableCFOptions {
317
318
  Temperature default_write_temperature;
318
319
  uint32_t memtable_protection_bytes_per_key;
319
320
  uint8_t block_protection_bytes_per_key;
321
+ bool paranoid_memory_checks;
320
322
 
321
323
  uint64_t sample_for_compression;
322
324
  std::vector<CompressionType> compression_per_level;
@@ -576,6 +576,14 @@ static std::unordered_map<std::string, OptionTypeInfo>
576
576
  {offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms),
577
577
  OptionType::kUInt64T, OptionVerificationType::kNormal,
578
578
  OptionTypeFlags::kNone}},
579
+ {"metadata_write_temperature",
580
+ {offsetof(struct ImmutableDBOptions, metadata_write_temperature),
581
+ OptionType::kTemperature, OptionVerificationType::kNormal,
582
+ OptionTypeFlags::kNone}},
583
+ {"wal_write_temperature",
584
+ {offsetof(struct ImmutableDBOptions, wal_write_temperature),
585
+ OptionType::kTemperature, OptionVerificationType::kNormal,
586
+ OptionTypeFlags::kNone}},
579
587
  };
580
588
 
581
589
  const std::string OptionsHelper::kDBOptionsName = "DBOptions";
@@ -778,7 +786,9 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
778
786
  follower_refresh_catchup_period_ms(
779
787
  options.follower_refresh_catchup_period_ms),
780
788
  follower_catchup_retry_count(options.follower_catchup_retry_count),
781
- follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms) {
789
+ follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms),
790
+ metadata_write_temperature(options.metadata_write_temperature),
791
+ wal_write_temperature(options.wal_write_temperature) {
782
792
  fs = env->GetFileSystem();
783
793
  clock = env->GetSystemClock().get();
784
794
  logger = info_log.get();
@@ -956,6 +966,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
956
966
  db_host_id.c_str());
957
967
  ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s",
958
968
  enforce_single_del_contracts ? "true" : "false");
969
+ ROCKS_LOG_HEADER(log, " Options.metadata_write_temperature: %s",
970
+ temperature_to_string[metadata_write_temperature].c_str());
971
+ ROCKS_LOG_HEADER(log, " Options.wal_write_temperature: %s",
972
+ temperature_to_string[wal_write_temperature].c_str());
959
973
  }
960
974
 
961
975
  bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {
@@ -103,6 +103,8 @@ struct ImmutableDBOptions {
103
103
  uint64_t follower_refresh_catchup_period_ms;
104
104
  uint64_t follower_catchup_retry_count;
105
105
  uint64_t follower_catchup_retry_wait_ms;
106
+ Temperature metadata_write_temperature;
107
+ Temperature wal_write_temperature;
106
108
 
107
109
  // Beginning convenience/helper objects that are not part of the base
108
110
  // DBOptions
@@ -180,6 +180,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
180
180
  options.enforce_single_del_contracts =
181
181
  immutable_db_options.enforce_single_del_contracts;
182
182
  options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc;
183
+ options.follower_refresh_catchup_period_ms =
184
+ immutable_db_options.follower_refresh_catchup_period_ms;
185
+ options.follower_catchup_retry_count =
186
+ immutable_db_options.follower_catchup_retry_count;
187
+ options.follower_catchup_retry_wait_ms =
188
+ immutable_db_options.follower_catchup_retry_wait_ms;
189
+ options.metadata_write_temperature =
190
+ immutable_db_options.metadata_write_temperature;
191
+ options.wal_write_temperature = immutable_db_options.wal_write_temperature;
183
192
  return options;
184
193
  }
185
194
 
@@ -213,6 +222,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
213
222
  moptions.memtable_protection_bytes_per_key;
214
223
  cf_opts->block_protection_bytes_per_key =
215
224
  moptions.block_protection_bytes_per_key;
225
+ cf_opts->paranoid_memory_checks = moptions.paranoid_memory_checks;
216
226
  cf_opts->bottommost_file_compaction_delay =
217
227
  moptions.bottommost_file_compaction_delay;
218
228
 
@@ -69,8 +69,9 @@ Status PersistRocksDBOptions(const WriteOptions& write_options,
69
69
  }
70
70
  std::unique_ptr<FSWritableFile> wf;
71
71
 
72
- Status s =
73
- fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr);
72
+ FileOptions file_options;
73
+ file_options.temperature = db_opt.metadata_write_temperature;
74
+ Status s = fs->NewWritableFile(file_name, file_options, &wf, nullptr);
74
75
  if (!s.ok()) {
75
76
  return s;
76
77
  }
@@ -188,6 +188,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
188
188
  "block_size_deviation=8;block_restart_interval=4; "
189
189
  "metadata_block_size=1024;"
190
190
  "partition_filters=false;"
191
+ "decouple_partitioned_filters=true;"
191
192
  "optimize_filters_for_memory=true;"
192
193
  "use_delta_encoding=true;"
193
194
  "index_block_restart_interval=4;"
@@ -366,7 +367,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
366
367
  "lowest_used_cache_tier=kNonVolatileBlockTier;"
367
368
  "allow_data_in_errors=false;"
368
369
  "enforce_single_del_contracts=false;"
369
- "daily_offpeak_time_utc=08:30-19:00;",
370
+ "daily_offpeak_time_utc=08:30-19:00;"
371
+ "follower_refresh_catchup_period_ms=123;"
372
+ "follower_catchup_retry_count=456;"
373
+ "follower_catchup_retry_wait_ms=789;"
374
+ "metadata_write_temperature=kCold;"
375
+ "wal_write_temperature=kHot;",
370
376
  new_options));
371
377
 
372
378
  ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),
@@ -567,7 +573,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
567
573
  "block_protection_bytes_per_key=1;"
568
574
  "memtable_max_range_deletions=999999;"
569
575
  "bottommost_file_compaction_delay=7200;"
570
- "uncache_aggressiveness=1234;",
576
+ "uncache_aggressiveness=1234;"
577
+ "paranoid_memory_checks=1;",
571
578
  new_options));
572
579
 
573
580
  ASSERT_NE(new_options->blob_cache.get(), nullptr);