@nxtedition/rocksdb 7.0.0 → 7.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/binding.cc +38 -40
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -1
  3. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +3 -1
  4. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +28 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +5 -2
  8. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +48 -60
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +18 -20
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  11. package/deps/rocksdb/rocksdb/db/c.cc +5 -0
  12. package/deps/rocksdb/rocksdb/db/column_family.cc +20 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +44 -26
  15. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +32 -14
  16. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -44
  17. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +3 -1
  18. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -1
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -5
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +47 -35
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -1
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +54 -32
  23. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +426 -61
  24. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -0
  25. package/deps/rocksdb/rocksdb/db/db_test.cc +102 -24
  26. package/deps/rocksdb/rocksdb/db/db_test2.cc +159 -30
  27. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +1 -1
  29. package/deps/rocksdb/rocksdb/db/version_builder.cc +39 -10
  30. package/deps/rocksdb/rocksdb/db/version_builder.h +4 -1
  31. package/deps/rocksdb/rocksdb/db/version_edit.h +20 -0
  32. package/deps/rocksdb/rocksdb/db/version_set.cc +2 -1
  33. package/deps/rocksdb/rocksdb/db/version_set.h +17 -2
  34. package/deps/rocksdb/rocksdb/db/version_set_test.cc +119 -0
  35. package/deps/rocksdb/rocksdb/db/write_batch.cc +96 -0
  36. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -0
  37. package/deps/rocksdb/rocksdb/db/write_thread.cc +1 -0
  38. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -2
  41. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -0
  42. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +12 -0
  43. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +1 -1
  44. package/deps/rocksdb/rocksdb/env/fs_posix.cc +96 -6
  45. package/deps/rocksdb/rocksdb/env/io_posix.cc +51 -18
  46. package/deps/rocksdb/rocksdb/env/io_posix.h +2 -0
  47. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +12 -5
  48. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +22 -6
  49. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +99 -8
  50. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +9 -1
  51. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +3 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +4 -0
  54. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  55. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +7 -0
  56. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +11 -1
  57. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +4 -1
  58. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +14 -1
  59. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +6 -0
  60. package/deps/rocksdb/rocksdb/options/cf_options.cc +12 -1
  61. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  62. package/deps/rocksdb/rocksdb/options/options.cc +8 -1
  63. package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
  64. package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -1
  65. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +7 -2
  66. package/deps/rocksdb/rocksdb/options/options_test.cc +52 -0
  67. package/deps/rocksdb/rocksdb/port/port_posix.h +10 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -1
  69. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +1 -1
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -5
  72. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -10
  73. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +1 -1
  74. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +1 -1
  75. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  76. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  77. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +39 -12
  78. package/deps/rocksdb/rocksdb/util/comparator.cc +10 -0
  79. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1 -1
  80. package/deps/rocksdb/rocksdb/util/xxhash.h +2 -1
  81. package/index.js +2 -2
  82. package/package.json +1 -1
  83. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  84. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -14,6 +14,7 @@
14
14
  #include <unordered_map>
15
15
  #include <vector>
16
16
 
17
+ #include "cache/cache_reservation_manager.h"
17
18
  #include "db/memtable_list.h"
18
19
  #include "db/table_cache.h"
19
20
  #include "db/table_properties_collector.h"
@@ -520,6 +521,10 @@ class ColumnFamilyData {
520
521
 
521
522
  ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); }
522
523
  WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; }
524
+ std::shared_ptr<CacheReservationManager>
525
+ GetFileMetadataCacheReservationManager() {
526
+ return file_metadata_cache_res_mgr_;
527
+ }
523
528
 
524
529
  static const uint32_t kDummyColumnFamilyDataId;
525
530
 
@@ -618,6 +623,10 @@ class ColumnFamilyData {
618
623
  bool db_paths_registered_;
619
624
 
620
625
  std::string full_history_ts_low_;
626
+
627
+ // For charging memory usage of file metadata created for newly added files to
628
+ // a Version associated with this CFD
629
+ std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
621
630
  };
622
631
 
623
632
  // ColumnFamilySet has interesting thread-safety requirements
@@ -157,10 +157,12 @@ void CompactionIterator::Next() {
157
157
  Status s = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
158
158
  // MergeUntil stops when it encounters a corrupt key and does not
159
159
  // include them in the result, so we expect the keys here to be valid.
160
- assert(s.ok());
161
160
  if (!s.ok()) {
162
- ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s",
163
- s.getState());
161
+ ROCKS_LOG_FATAL(
162
+ info_log_, "Invalid ikey %s in compaction. %s",
163
+ allow_data_in_errors_ ? key_.ToString(true).c_str() : "hidden",
164
+ s.getState());
165
+ assert(false);
164
166
  }
165
167
 
166
168
  // Keep current_key_ in sync.
@@ -517,18 +519,18 @@ void CompactionIterator::NextFromInput() {
517
519
  // In the previous iteration we encountered a single delete that we could
518
520
  // not compact out. We will keep this Put, but can drop it's data.
519
521
  // (See Optimization 3, below.)
520
- assert(ikey_.type == kTypeValue || ikey_.type == kTypeBlobIndex);
521
522
  if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex) {
522
- ROCKS_LOG_FATAL(info_log_,
523
- "Unexpected key type %d for compaction output",
524
- ikey_.type);
523
+ ROCKS_LOG_FATAL(info_log_, "Unexpected key %s for compaction output",
524
+ ikey_.DebugString(allow_data_in_errors_, true).c_str());
525
+ assert(false);
525
526
  }
526
- assert(current_user_key_snapshot_ >= last_snapshot);
527
527
  if (current_user_key_snapshot_ < last_snapshot) {
528
528
  ROCKS_LOG_FATAL(info_log_,
529
- "current_user_key_snapshot_ (%" PRIu64
529
+ "key %s, current_user_key_snapshot_ (%" PRIu64
530
530
  ") < last_snapshot (%" PRIu64 ")",
531
+ ikey_.DebugString(allow_data_in_errors_, true).c_str(),
531
532
  current_user_key_snapshot_, last_snapshot);
533
+ assert(false);
532
534
  }
533
535
 
534
536
  if (ikey_.type == kTypeBlobIndex) {
@@ -767,12 +769,13 @@ void CompactionIterator::NextFromInput() {
767
769
  // Note: Dropping this key will not affect TransactionDB write-conflict
768
770
  // checking since there has already been a record returned for this key
769
771
  // in this snapshot.
770
- assert(last_sequence >= current_user_key_sequence_);
771
772
  if (last_sequence < current_user_key_sequence_) {
772
773
  ROCKS_LOG_FATAL(info_log_,
773
- "last_sequence (%" PRIu64
774
+ "key %s, last_sequence (%" PRIu64
774
775
  ") < current_user_key_sequence_ (%" PRIu64 ")",
776
+ ikey_.DebugString(allow_data_in_errors_, true).c_str(),
775
777
  last_sequence, current_user_key_sequence_);
778
+ assert(false);
776
779
  }
777
780
 
778
781
  ++iter_stats_.num_record_drop_hidden; // rule (A)
@@ -884,10 +887,12 @@ void CompactionIterator::NextFromInput() {
884
887
  pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
885
888
  // MergeUntil stops when it encounters a corrupt key and does not
886
889
  // include them in the result, so we expect the keys here to valid.
887
- assert(pik_status.ok());
888
890
  if (!pik_status.ok()) {
889
- ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s",
890
- pik_status.getState());
891
+ ROCKS_LOG_FATAL(
892
+ info_log_, "Invalid key %s in compaction. %s",
893
+ allow_data_in_errors_ ? key_.ToString(true).c_str() : "hidden",
894
+ pik_status.getState());
895
+ assert(false);
891
896
  }
892
897
  // Keep current_key_ in sync.
893
898
  current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
@@ -1090,15 +1095,22 @@ void CompactionIterator::PrepareOutput() {
1090
1095
  !compaction_->allow_ingest_behind() && bottommost_level_ &&
1091
1096
  DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
1092
1097
  ikey_.type != kTypeMerge && current_key_committed_) {
1093
- assert(ikey_.type != kTypeDeletion);
1094
- assert(ikey_.type != kTypeSingleDeletion ||
1095
- (timestamp_size_ || full_history_ts_low_));
1096
1098
  if (ikey_.type == kTypeDeletion ||
1097
- (ikey_.type == kTypeSingleDeletion &&
1098
- (!timestamp_size_ || !full_history_ts_low_))) {
1099
- ROCKS_LOG_FATAL(info_log_,
1100
- "Unexpected key type %d for seq-zero optimization",
1101
- ikey_.type);
1099
+ (ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
1100
+ ROCKS_LOG_FATAL(
1101
+ info_log_,
1102
+ "Unexpected key %s for seq-zero optimization. "
1103
+ "earliest_snapshot %" PRIu64
1104
+ ", earliest_write_conflict_snapshot %" PRIu64
1105
+ " job_snapshot %" PRIu64
1106
+ ". timestamp_size: %d full_history_ts_low_ %s",
1107
+ ikey_.DebugString(allow_data_in_errors_, true).c_str(),
1108
+ earliest_snapshot_, earliest_write_conflict_snapshot_,
1109
+ job_snapshot_, static_cast<int>(timestamp_size_),
1110
+ full_history_ts_low_ != nullptr
1111
+ ? Slice(*full_history_ts_low_).ToString(true).c_str()
1112
+ : "null");
1113
+ assert(false);
1102
1114
  }
1103
1115
  ikey_.sequence = 0;
1104
1116
  last_key_seq_zeroed_ = true;
@@ -1129,14 +1141,17 @@ inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot(
1129
1141
  }
1130
1142
  auto snapshots_iter = std::lower_bound(
1131
1143
  snapshots_->begin(), snapshots_->end(), in);
1144
+ assert(prev_snapshot != nullptr);
1132
1145
  if (snapshots_iter == snapshots_->begin()) {
1133
1146
  *prev_snapshot = 0;
1134
1147
  } else {
1135
1148
  *prev_snapshot = *std::prev(snapshots_iter);
1136
- assert(*prev_snapshot < in);
1137
1149
  if (*prev_snapshot >= in) {
1138
1150
  ROCKS_LOG_FATAL(info_log_,
1139
- "*prev_snapshot >= in in findEarliestVisibleSnapshot");
1151
+ "*prev_snapshot (%" PRIu64 ") >= in (%" PRIu64
1152
+ ") in findEarliestVisibleSnapshot",
1153
+ *prev_snapshot, in);
1154
+ assert(false);
1140
1155
  }
1141
1156
  }
1142
1157
  if (snapshot_checker_ == nullptr) {
@@ -1146,9 +1161,12 @@ inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot(
1146
1161
  bool has_released_snapshot = !released_snapshots_.empty();
1147
1162
  for (; snapshots_iter != snapshots_->end(); ++snapshots_iter) {
1148
1163
  auto cur = *snapshots_iter;
1149
- assert(in <= cur);
1150
1164
  if (in > cur) {
1151
- ROCKS_LOG_FATAL(info_log_, "in > cur in findEarliestVisibleSnapshot");
1165
+ ROCKS_LOG_FATAL(info_log_,
1166
+ "in (%" PRIu64 ") > cur (%" PRIu64
1167
+ ") in findEarliestVisibleSnapshot",
1168
+ in, cur);
1169
+ assert(false);
1152
1170
  }
1153
1171
  // Skip if cur is in released_snapshots.
1154
1172
  if (has_released_snapshot && released_snapshots_.count(cur) > 0) {
@@ -449,67 +449,85 @@ TEST_P(ComparatorDBTest, TwoStrComparator) {
449
449
  }
450
450
  }
451
451
 
452
+ namespace {
453
+ void VerifyNotSuccessor(const Slice& s, const Slice& t) {
454
+ auto bc = BytewiseComparator();
455
+ auto rbc = ReverseBytewiseComparator();
456
+ ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(s, t));
457
+ ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t));
458
+ ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s));
459
+ ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s));
460
+ }
461
+
462
+ void VerifySuccessor(const Slice& s, const Slice& t) {
463
+ auto bc = BytewiseComparator();
464
+ auto rbc = ReverseBytewiseComparator();
465
+ ASSERT_TRUE(bc->IsSameLengthImmediateSuccessor(s, t));
466
+ ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t));
467
+ ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s));
468
+ // Should be true but that increases exposure to a design bug in
469
+ // auto_prefix_mode, so currently set to FALSE
470
+ ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s));
471
+ }
472
+
473
+ } // namespace
474
+
452
475
  TEST_P(ComparatorDBTest, IsSameLengthImmediateSuccessor) {
453
476
  {
454
477
  // different length
455
478
  Slice s("abcxy");
456
479
  Slice t("abcxyz");
457
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
480
+ VerifyNotSuccessor(s, t);
458
481
  }
459
482
  {
460
483
  Slice s("abcxyz");
461
484
  Slice t("abcxy");
462
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
485
+ VerifyNotSuccessor(s, t);
463
486
  }
464
487
  {
465
488
  // not last byte different
466
489
  Slice s("abc1xyz");
467
490
  Slice t("abc2xyz");
468
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
491
+ VerifyNotSuccessor(s, t);
469
492
  }
470
493
  {
471
494
  // same string
472
495
  Slice s("abcxyz");
473
496
  Slice t("abcxyz");
474
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
497
+ VerifyNotSuccessor(s, t);
475
498
  }
476
499
  {
477
500
  Slice s("abcxy");
478
501
  Slice t("abcxz");
479
- ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
480
- }
481
- {
482
- Slice s("abcxz");
483
- Slice t("abcxy");
484
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
502
+ VerifySuccessor(s, t);
485
503
  }
486
504
  {
487
505
  const char s_array[] = "\x50\x8a\xac";
488
506
  const char t_array[] = "\x50\x8a\xad";
489
507
  Slice s(s_array);
490
508
  Slice t(t_array);
491
- ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
509
+ VerifySuccessor(s, t);
492
510
  }
493
511
  {
494
512
  const char s_array[] = "\x50\x8a\xff";
495
513
  const char t_array[] = "\x50\x8b\x00";
496
514
  Slice s(s_array, 3);
497
515
  Slice t(t_array, 3);
498
- ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
516
+ VerifySuccessor(s, t);
499
517
  }
500
518
  {
501
519
  const char s_array[] = "\x50\x8a\xff\xff";
502
520
  const char t_array[] = "\x50\x8b\x00\x00";
503
521
  Slice s(s_array, 4);
504
522
  Slice t(t_array, 4);
505
- ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
523
+ VerifySuccessor(s, t);
506
524
  }
507
525
  {
508
526
  const char s_array[] = "\x50\x8a\xff\xff";
509
527
  const char t_array[] = "\x50\x8b\x00\x01";
510
528
  Slice s(s_array, 4);
511
529
  Slice t(t_array, 4);
512
- ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
530
+ VerifyNotSuccessor(s, t);
513
531
  }
514
532
  }
515
533
 
@@ -579,55 +579,84 @@ TEST_F(DBBasicTest, ManifestRollOver) {
579
579
  } while (ChangeCompactOptions());
580
580
  }
581
581
 
582
- TEST_F(DBBasicTest, IdentityAcrossRestarts1) {
582
+ TEST_F(DBBasicTest, IdentityAcrossRestarts) {
583
+ constexpr size_t kMinIdSize = 10;
583
584
  do {
584
- std::string id1;
585
- ASSERT_OK(db_->GetDbIdentity(id1));
586
-
587
- Options options = CurrentOptions();
588
- Reopen(options);
589
- std::string id2;
590
- ASSERT_OK(db_->GetDbIdentity(id2));
591
- // id1 should match id2 because identity was not regenerated
592
- ASSERT_EQ(id1.compare(id2), 0);
593
-
594
- std::string idfilename = IdentityFileName(dbname_);
595
- ASSERT_OK(env_->DeleteFile(idfilename));
596
- Reopen(options);
597
- std::string id3;
598
- ASSERT_OK(db_->GetDbIdentity(id3));
599
- if (options.write_dbid_to_manifest) {
600
- ASSERT_EQ(id1.compare(id3), 0);
601
- } else {
602
- // id1 should NOT match id3 because identity was regenerated
603
- ASSERT_NE(id1.compare(id3), 0);
585
+ for (bool with_manifest : {false, true}) {
586
+ std::string idfilename = IdentityFileName(dbname_);
587
+ std::string id1, tmp;
588
+ ASSERT_OK(db_->GetDbIdentity(id1));
589
+ ASSERT_GE(id1.size(), kMinIdSize);
590
+
591
+ Options options = CurrentOptions();
592
+ options.write_dbid_to_manifest = with_manifest;
593
+ Reopen(options);
594
+ std::string id2;
595
+ ASSERT_OK(db_->GetDbIdentity(id2));
596
+ // id2 should match id1 because identity was not regenerated
597
+ ASSERT_EQ(id1, id2);
598
+ ASSERT_OK(ReadFileToString(env_, idfilename, &tmp));
599
+ ASSERT_EQ(tmp, id2);
600
+
601
+ // Recover from deleted/missing IDENTITY
602
+ ASSERT_OK(env_->DeleteFile(idfilename));
603
+ Reopen(options);
604
+ std::string id3;
605
+ ASSERT_OK(db_->GetDbIdentity(id3));
606
+ if (with_manifest) {
607
+ // id3 should match id1 because identity was restored from manifest
608
+ ASSERT_EQ(id1, id3);
609
+ } else {
610
+ // id3 should NOT match id1 because identity was regenerated
611
+ ASSERT_NE(id1, id3);
612
+ ASSERT_GE(id3.size(), kMinIdSize);
613
+ }
614
+ ASSERT_OK(ReadFileToString(env_, idfilename, &tmp));
615
+ ASSERT_EQ(tmp, id3);
616
+
617
+ // Recover from truncated IDENTITY
618
+ {
619
+ std::unique_ptr<WritableFile> w;
620
+ ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions()));
621
+ ASSERT_OK(w->Close());
622
+ }
623
+ Reopen(options);
624
+ std::string id4;
625
+ ASSERT_OK(db_->GetDbIdentity(id4));
626
+ if (with_manifest) {
627
+ // id4 should match id1 because identity was restored from manifest
628
+ ASSERT_EQ(id1, id4);
629
+ } else {
630
+ // id4 should NOT match id1 because identity was regenerated
631
+ ASSERT_NE(id1, id4);
632
+ ASSERT_GE(id4.size(), kMinIdSize);
633
+ }
634
+ ASSERT_OK(ReadFileToString(env_, idfilename, &tmp));
635
+ ASSERT_EQ(tmp, id4);
636
+
637
+ // Recover from overwritten IDENTITY
638
+ std::string silly_id = "asdf123456789";
639
+ {
640
+ std::unique_ptr<WritableFile> w;
641
+ ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions()));
642
+ ASSERT_OK(w->Append(silly_id));
643
+ ASSERT_OK(w->Close());
644
+ }
645
+ Reopen(options);
646
+ std::string id5;
647
+ ASSERT_OK(db_->GetDbIdentity(id5));
648
+ if (with_manifest) {
649
+ // id4 should match id1 because identity was restored from manifest
650
+ ASSERT_EQ(id1, id5);
651
+ } else {
652
+ ASSERT_EQ(id5, silly_id);
653
+ }
654
+ ASSERT_OK(ReadFileToString(env_, idfilename, &tmp));
655
+ ASSERT_EQ(tmp, id5);
604
656
  }
605
657
  } while (ChangeCompactOptions());
606
658
  }
607
659
 
608
- TEST_F(DBBasicTest, IdentityAcrossRestarts2) {
609
- do {
610
- std::string id1;
611
- ASSERT_OK(db_->GetDbIdentity(id1));
612
-
613
- Options options = CurrentOptions();
614
- options.write_dbid_to_manifest = true;
615
- Reopen(options);
616
- std::string id2;
617
- ASSERT_OK(db_->GetDbIdentity(id2));
618
- // id1 should match id2 because identity was not regenerated
619
- ASSERT_EQ(id1.compare(id2), 0);
620
-
621
- std::string idfilename = IdentityFileName(dbname_);
622
- ASSERT_OK(env_->DeleteFile(idfilename));
623
- Reopen(options);
624
- std::string id3;
625
- ASSERT_OK(db_->GetDbIdentity(id3));
626
- // id1 should NOT match id3 because identity was regenerated
627
- ASSERT_EQ(id1, id3);
628
- } while (ChangeCompactOptions());
629
- }
630
-
631
660
  #ifndef ROCKSDB_LITE
632
661
  TEST_F(DBBasicTest, Snapshot) {
633
662
  env_->SetMockSleep();
@@ -936,7 +936,9 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) {
936
936
  for (std::shared_ptr<Cache> base_cache :
937
937
  {NewLRUCache(capacity, num_shard_bits),
938
938
  NewClockCache(capacity, num_shard_bits),
939
- NewFastLRUCache(capacity, num_shard_bits)}) {
939
+ NewFastLRUCache(capacity, 1 /*estimated_value_size*/, num_shard_bits,
940
+ false /*strict_capacity_limit*/,
941
+ kDefaultCacheMetadataChargePolicy)}) {
940
942
  if (!base_cache) {
941
943
  // Skip clock cache when not supported
942
944
  continue;
@@ -812,8 +812,9 @@ Status DBImpl::StartPeriodicWorkScheduler() {
812
812
  return periodic_work_scheduler_->Register(
813
813
  this, mutable_db_options_.stats_dump_period_sec,
814
814
  mutable_db_options_.stats_persist_period_sec);
815
- #endif // !ROCKSDB_LITE
815
+ #else
816
816
  return Status::OK();
817
+ #endif // !ROCKSDB_LITE
817
818
  }
818
819
 
819
820
  // esitmate the total size of stats_history_
@@ -1742,6 +1743,8 @@ Status DBImpl::Get(const ReadOptions& read_options,
1742
1743
  Status DBImpl::Get(const ReadOptions& read_options,
1743
1744
  ColumnFamilyHandle* column_family, const Slice& key,
1744
1745
  PinnableSlice* value, std::string* timestamp) {
1746
+ assert(value != nullptr);
1747
+ value->Reset();
1745
1748
  GetImplOptions get_impl_options;
1746
1749
  get_impl_options.column_family = column_family;
1747
1750
  get_impl_options.value = value;
@@ -2349,6 +2352,7 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
2349
2352
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
2350
2353
  sorted_keys.resize(num_keys);
2351
2354
  for (size_t i = 0; i < num_keys; ++i) {
2355
+ values[i].Reset();
2352
2356
  key_context.emplace_back(column_families[i], keys[i], &values[i],
2353
2357
  timestamps ? &timestamps[i] : nullptr,
2354
2358
  &statuses[i]);
@@ -2495,6 +2499,7 @@ void DBImpl::MultiGet(const ReadOptions& read_options,
2495
2499
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
2496
2500
  sorted_keys.resize(num_keys);
2497
2501
  for (size_t i = 0; i < num_keys; ++i) {
2502
+ values[i].Reset();
2498
2503
  key_context.emplace_back(column_family, keys[i], &values[i],
2499
2504
  timestamps ? &timestamps[i] : nullptr,
2500
2505
  &statuses[i]);
@@ -1479,8 +1479,10 @@ class DBImpl : public DB {
1479
1479
 
1480
1480
  virtual bool OwnTablesAndLogs() const { return true; }
1481
1481
 
1482
- // Set DB identity file, and write DB ID to manifest if necessary.
1483
- Status SetDBId(bool read_only, RecoveryContext* recovery_ctx);
1482
+ // Setup DB identity file, and write DB ID to manifest if necessary.
1483
+ Status SetupDBId(bool read_only, RecoveryContext* recovery_ctx);
1484
+ // Assign db_id_ and write DB ID to manifest if necessary.
1485
+ void SetDBId(std::string&& id, bool read_only, RecoveryContext* recovery_ctx);
1484
1486
 
1485
1487
  // REQUIRES: db mutex held when calling this function, but the db mutex can
1486
1488
  // be released and re-acquired. Db mutex will be held when the function
@@ -1915,9 +1917,12 @@ class DBImpl : public DB {
1915
1917
  Status PreprocessWrite(const WriteOptions& write_options, bool* need_log_sync,
1916
1918
  WriteContext* write_context);
1917
1919
 
1918
- WriteBatch* MergeBatch(const WriteThread::WriteGroup& write_group,
1919
- WriteBatch* tmp_batch, size_t* write_with_wal,
1920
- WriteBatch** to_be_cached_state);
1920
+ // Merge write batches in the write group into merged_batch.
1921
+ // Returns OK if merge is successful.
1922
+ // Returns Corruption if corruption in write batch is detected.
1923
+ Status MergeBatch(const WriteThread::WriteGroup& write_group,
1924
+ WriteBatch* tmp_batch, WriteBatch** merged_batch,
1925
+ size_t* write_with_wal, WriteBatch** to_be_cached_state);
1921
1926
 
1922
1927
  // rate_limiter_priority is used to charge `DBOptions::rate_limiter`
1923
1928
  // for automatic WAL flush (`Options::manual_wal_flush` == false)
@@ -873,43 +873,55 @@ uint64_t PrecomputeMinLogNumberToKeep2PC(
873
873
  return min_log_number_to_keep;
874
874
  }
875
875
 
876
- Status DBImpl::SetDBId(bool read_only, RecoveryContext* recovery_ctx) {
876
+ void DBImpl::SetDBId(std::string&& id, bool read_only,
877
+ RecoveryContext* recovery_ctx) {
878
+ assert(db_id_.empty());
879
+ assert(!id.empty());
880
+ db_id_ = std::move(id);
881
+ if (!read_only && immutable_db_options_.write_dbid_to_manifest) {
882
+ assert(recovery_ctx != nullptr);
883
+ assert(versions_->GetColumnFamilySet() != nullptr);
884
+ VersionEdit edit;
885
+ edit.SetDBId(db_id_);
886
+ versions_->db_id_ = db_id_;
887
+ recovery_ctx->UpdateVersionEdits(
888
+ versions_->GetColumnFamilySet()->GetDefault(), edit);
889
+ }
890
+ }
891
+
892
+ Status DBImpl::SetupDBId(bool read_only, RecoveryContext* recovery_ctx) {
877
893
  Status s;
878
- // Happens when immutable_db_options_.write_dbid_to_manifest is set to true
879
- // the very first time.
880
- if (db_id_.empty()) {
881
- // Check for the IDENTITY file and create it if not there.
882
- s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr);
883
- // Typically Identity file is created in NewDB() and for some reason if
884
- // it is no longer available then at this point DB ID is not in Identity
885
- // file or Manifest.
886
- if (s.IsNotFound()) {
887
- // Create a new DB ID, saving to file only if allowed
888
- if (read_only) {
889
- db_id_ = env_->GenerateUniqueId();
890
- return Status::OK();
891
- } else {
892
- s = SetIdentityFile(env_, dbname_);
893
- if (!s.ok()) {
894
- return s;
895
- }
894
+ // Check for the IDENTITY file and create it if not there or
895
+ // broken or not matching manifest
896
+ std::string db_id_in_file;
897
+ s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr);
898
+ if (s.ok()) {
899
+ s = GetDbIdentityFromIdentityFile(&db_id_in_file);
900
+ if (s.ok() && !db_id_in_file.empty()) {
901
+ if (db_id_.empty()) {
902
+ // Loaded from file and wasn't already known from manifest
903
+ SetDBId(std::move(db_id_in_file), read_only, recovery_ctx);
904
+ return s;
905
+ } else if (db_id_ == db_id_in_file) {
906
+ // Loaded from file and matches manifest
907
+ return s;
896
908
  }
897
- } else if (!s.ok()) {
898
- assert(s.IsIOError());
899
- return s;
900
- }
901
- s = GetDbIdentityFromIdentityFile(&db_id_);
902
- if (immutable_db_options_.write_dbid_to_manifest && s.ok()) {
903
- assert(!read_only);
904
- assert(recovery_ctx != nullptr);
905
- assert(versions_->GetColumnFamilySet() != nullptr);
906
- VersionEdit edit;
907
- edit.SetDBId(db_id_);
908
- versions_->db_id_ = db_id_;
909
- recovery_ctx->UpdateVersionEdits(
910
- versions_->GetColumnFamilySet()->GetDefault(), edit);
911
- }
912
- } else if (!read_only) {
909
+ }
910
+ }
911
+ if (s.IsNotFound()) {
912
+ s = Status::OK();
913
+ }
914
+ if (!s.ok()) {
915
+ assert(s.IsIOError());
916
+ return s;
917
+ }
918
+ // Otherwise IDENTITY file is missing or no good.
919
+ // Generate new id if needed
920
+ if (db_id_.empty()) {
921
+ SetDBId(env_->GenerateUniqueId(), read_only, recovery_ctx);
922
+ }
923
+ // Persist it to IDENTITY file if allowed
924
+ if (!read_only) {
913
925
  s = SetIdentityFile(env_, dbname_, db_id_);
914
926
  }
915
927
  return s;
@@ -524,7 +524,8 @@ Status DBImpl::Recover(
524
524
  return s;
525
525
  }
526
526
  }
527
- s = SetDBId(read_only, recovery_ctx);
527
+ s = SetupDBId(read_only, recovery_ctx);
528
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str());
528
529
  if (s.ok() && !read_only) {
529
530
  s = DeleteUnreferencedSstFiles(recovery_ctx);
530
531
  }