@nxtedition/rocksdb 13.5.9 → 13.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/deps/rocksdb/rocksdb/BUCK +2 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
  3. package/deps/rocksdb/rocksdb/Makefile +1 -1
  4. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
  5. package/deps/rocksdb/rocksdb/db/c.cc +13 -0
  6. package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
  17. package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
  18. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
  23. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
  24. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
  25. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
  26. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
  27. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
  28. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
  29. package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
  30. package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
  31. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
  32. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
  33. package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
  34. package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
  35. package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
  36. package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
  37. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
  38. package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
  39. package/deps/rocksdb/rocksdb/env/env.cc +4 -0
  40. package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
  41. package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
  42. package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
  43. package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
  44. package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
  45. package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
  46. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
  47. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
  48. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
  49. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
  50. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
  51. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
  54. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
  55. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
  56. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
  57. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
  58. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
  59. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
  60. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
  61. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
  62. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  63. package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
  64. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
  65. package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
  66. package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
  67. package/deps/rocksdb/rocksdb/src.mk +2 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
  69. package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  72. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
  73. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
  74. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
  75. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
  76. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
  77. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
  78. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
  79. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
  80. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
  81. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
  82. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
  83. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
  84. package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
  85. package/deps/rocksdb/rocksdb/table/format.cc +51 -33
  86. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
  88. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
  89. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
  90. package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
  91. package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
  92. package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
  93. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
  94. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
  95. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
  96. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
  97. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
  98. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
  99. package/deps/rocksdb/rocksdb/util/coding.h +4 -3
  100. package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
  101. package/deps/rocksdb/rocksdb/util/compression.h +16 -6
  102. package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
  103. package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
  104. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
  105. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
  106. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
  107. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
  108. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
  109. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
  110. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
  111. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
  112. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
  113. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
  114. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
  115. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
  116. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
  117. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
  118. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
  119. package/deps/rocksdb/rocksdb.gyp +15 -1
  120. package/package.json +1 -1
  121. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  122. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  123. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
  124. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
@@ -556,6 +556,74 @@ TEST_F(CorruptionTest, TableFileFooterNotMagic) {
556
556
  ASSERT_TRUE(s.ToString().find(".sst") != std::string::npos);
557
557
  }
558
558
 
559
+ TEST_F(CorruptionTest, DBOpenWithWrongFileSize) {
560
+ // Validate that when paranoid flag is true, DB::Open() fails if one of the
561
+ // file corrupted. Validate that when paranoid flag is false, DB::Open()
562
+ // succeed if one of the file corrupted, and the healthy file is readable.
563
+ CloseDb();
564
+
565
+ const std::string test_cf_name = "test_cf";
566
+ std::vector<ColumnFamilyDescriptor> cf_descs;
567
+ cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions());
568
+ cf_descs.emplace_back(test_cf_name, ColumnFamilyOptions());
569
+
570
+ {
571
+ options_.create_missing_column_families = true;
572
+ std::vector<ColumnFamilyHandle*> cfhs;
573
+ ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_));
574
+ assert(db_ != nullptr); // suppress false clang-analyze report
575
+
576
+ ASSERT_OK(db_->Put(WriteOptions(), cfhs[0], "k", "v"));
577
+ ASSERT_OK(db_->Put(WriteOptions(), cfhs[1], "k1", "v1"));
578
+ ASSERT_OK(db_->Put(WriteOptions(), cfhs[0], "k2", "v2"));
579
+ for (auto* cfh : cfhs) {
580
+ delete cfh;
581
+ }
582
+ DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
583
+ ASSERT_OK(dbi->TEST_FlushMemTable());
584
+
585
+ // ********************************************
586
+ // Corrupt the file by making the file bigger
587
+ std::vector<LiveFileMetaData> metadata;
588
+ db_->GetLiveFilesMetaData(&metadata);
589
+ std::string filename = dbname_ + metadata[0].name;
590
+ const auto& fs = options_.env->GetFileSystem();
591
+ {
592
+ std::unique_ptr<FSWritableFile> f;
593
+ ASSERT_OK(fs->ReopenWritableFile(filename, FileOptions(), &f, nullptr));
594
+ ASSERT_OK(f->Append("blahblah", IOOptions(), nullptr));
595
+ ASSERT_OK(f->Close(IOOptions(), nullptr));
596
+ }
597
+ CloseDb();
598
+ }
599
+
600
+ // DB failed to open due to one of the file is corrupted, as paranoid flag is
601
+ // true
602
+ options_.paranoid_checks = true;
603
+ std::vector<ColumnFamilyHandle*> cfhs;
604
+ auto s = DB::Open(options_, dbname_, cf_descs, &cfhs, &db_);
605
+ ASSERT_TRUE(s.IsCorruption());
606
+ ASSERT_TRUE(s.ToString().find("file size mismatch") != std::string::npos);
607
+
608
+ // DB opened successfully, as paranoid flag is false, validate the one that is
609
+ // healthy is still accessible
610
+ options_.paranoid_checks = false;
611
+ ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_));
612
+ assert(db_ != nullptr); // suppress false clang-analyze report
613
+
614
+ std::string v;
615
+ ASSERT_OK(db_->Get(ReadOptions(), cfhs[1], "k1", &v));
616
+ ASSERT_EQ(v, "v1");
617
+
618
+ // Validate the default column family is corrupted
619
+ Check(0, 0);
620
+ s = db_->Get(ReadOptions(), cfhs[0], "k1", &v);
621
+ ASSERT_TRUE(s.IsCorruption());
622
+
623
+ delete cfhs[1];
624
+ delete cfhs[0];
625
+ }
626
+
559
627
  TEST_F(CorruptionTest, TableFileWrongSize) {
560
628
  Build(100);
561
629
  DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
@@ -579,13 +647,16 @@ TEST_F(CorruptionTest, TableFileWrongSize) {
579
647
  // DB actually accepts this without paranoid checks, relying on size
580
648
  // recorded in manifest to locate the SST footer.
581
649
  options_.paranoid_checks = false;
582
- options_.skip_checking_sst_file_sizes_on_db_open = false;
583
650
  Reopen();
584
- Check(100, 100);
651
+ // As footer could not be extraced, file is completely unreadable
652
+ Check(0, 0);
653
+ std::string v;
654
+ auto s = db_->Get(ReadOptions(), "k1", &v);
655
+ ASSERT_TRUE(s.IsCorruption());
585
656
 
586
657
  // But reports the issue with paranoid checks
587
658
  options_.paranoid_checks = true;
588
- Status s = TryReopen();
659
+ s = TryReopen();
589
660
  ASSERT_TRUE(s.IsCorruption());
590
661
  ASSERT_TRUE(s.ToString().find("file size mismatch") != std::string::npos);
591
662
 
@@ -17,9 +17,10 @@ class DBEncryptionTest : public DBTestBase {
17
17
  public:
18
18
  DBEncryptionTest()
19
19
  : DBTestBase("db_encryption_test", /*env_do_fsync=*/true) {}
20
- Env* GetTargetEnv() {
20
+ Env* GetNonEncryptedEnv() {
21
21
  if (encrypted_env_ != nullptr) {
22
- return (static_cast<EnvWrapper*>(encrypted_env_))->target();
22
+ return (static_cast_with_check<CompositeEnvWrapper>(encrypted_env_))
23
+ ->env_target();
23
24
  } else {
24
25
  return env_;
25
26
  }
@@ -38,7 +39,7 @@ TEST_F(DBEncryptionTest, CheckEncrypted) {
38
39
  auto status = env_->GetChildren(dbname_, &fileNames);
39
40
  ASSERT_OK(status);
40
41
 
41
- Env* target = GetTargetEnv();
42
+ Env* target = GetNonEncryptedEnv();
42
43
  int hits = 0;
43
44
  for (auto it = fileNames.begin(); it != fileNames.end(); ++it) {
44
45
  if (*it == "LOCK") {
@@ -89,7 +90,7 @@ TEST_F(DBEncryptionTest, CheckEncrypted) {
89
90
  }
90
91
 
91
92
  TEST_F(DBEncryptionTest, ReadEmptyFile) {
92
- auto defaultEnv = GetTargetEnv();
93
+ auto defaultEnv = GetNonEncryptedEnv();
93
94
 
94
95
  // create empty file for reading it back in later
95
96
  auto envOptions = EnvOptions(CurrentOptions());
@@ -116,6 +117,40 @@ TEST_F(DBEncryptionTest, ReadEmptyFile) {
116
117
  ASSERT_TRUE(data.empty());
117
118
  }
118
119
 
120
+ TEST_F(DBEncryptionTest, NotSupportedGetFileSize) {
121
+ // Validate envrypted env does not support GetFileSize.
122
+ // The goal of the test is to validate the encrypted env/fs does not support
123
+ // GetFileSize API on FSRandomAccessFile interface.
124
+ // This test combined with the rest of the integration tests validate that
125
+ // the new API GetFileSize on FSRandomAccessFile interface is not required to
126
+ // be supported for database to work properly.
127
+ // The GetFileSize API is used in ReadFooterFromFile() API to get the file
128
+ // size. When GetFileSize API is not supported, the ReadFooterFromFile() API
129
+ // will use FileSystem GetFileSize API as fallback. Refer to the
130
+ // EncryptedRandomAccessFile class definition for more details.
131
+ if (!encrypted_env_) {
132
+ return;
133
+ }
134
+
135
+ auto fs = encrypted_env_->GetFileSystem();
136
+
137
+ // create empty file for reading it back in later
138
+ auto filePath = dbname_ + "/empty.empty";
139
+
140
+ // Create empty file
141
+ CreateFile(fs.get(), filePath, "", false);
142
+
143
+ // Open it for reading footer
144
+ std::unique_ptr<FSRandomAccessFile> randomAccessFile;
145
+ auto status = fs->NewRandomAccessFile(filePath, FileOptions(),
146
+ &randomAccessFile, nullptr);
147
+ ASSERT_OK(status);
148
+
149
+ uint64_t fileSize;
150
+ status = randomAccessFile->GetFileSize(&fileSize);
151
+ ASSERT_TRUE(status.IsNotSupported());
152
+ }
153
+
119
154
  } // namespace ROCKSDB_NAMESPACE
120
155
 
121
156
  int main(int argc, char** argv) {
@@ -3836,10 +3836,8 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options,
3836
3836
  std::unique_ptr<MultiScan> DBImpl::NewMultiScan(
3837
3837
  const ReadOptions& _read_options, ColumnFamilyHandle* column_family,
3838
3838
  const std::vector<ScanOptions>& scan_opts) {
3839
- std::unique_ptr<Iterator> iter(NewIterator(_read_options, column_family));
3840
- iter->Prepare(scan_opts);
3841
- std::unique_ptr<MultiScan> ms_iter =
3842
- std::make_unique<MultiScan>(scan_opts, std::move(iter));
3839
+ std::unique_ptr<MultiScan> ms_iter = std::make_unique<MultiScan>(
3840
+ _read_options, scan_opts, this, column_family);
3843
3841
  return ms_iter;
3844
3842
  }
3845
3843
 
@@ -5057,85 +5055,6 @@ void DBImpl::GetAllColumnFamilyMetaData(
5057
5055
  }
5058
5056
  }
5059
5057
 
5060
- Status DBImpl::CheckConsistency() {
5061
- mutex_.AssertHeld();
5062
- std::vector<LiveFileMetaData> metadata;
5063
- versions_->GetLiveFilesMetaData(&metadata);
5064
- TEST_SYNC_POINT("DBImpl::CheckConsistency:AfterGetLiveFilesMetaData");
5065
-
5066
- std::string corruption_messages;
5067
-
5068
- if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
5069
- // Instead of calling GetFileSize() for each expected file, call
5070
- // GetChildren() for the DB directory and check that all expected files
5071
- // are listed, without checking their sizes.
5072
- // Since sst files might be in different directories, do it for each
5073
- // directory separately.
5074
- std::map<std::string, std::vector<std::string>> files_by_directory;
5075
- for (const auto& md : metadata) {
5076
- // md.name has a leading "/". Remove it.
5077
- std::string fname = md.name;
5078
- if (!fname.empty() && fname[0] == '/') {
5079
- fname = fname.substr(1);
5080
- }
5081
- files_by_directory[md.db_path].push_back(fname);
5082
- }
5083
-
5084
- IOOptions io_opts;
5085
- io_opts.do_not_recurse = true;
5086
- for (const auto& dir_files : files_by_directory) {
5087
- std::string directory = dir_files.first;
5088
- std::vector<std::string> existing_files;
5089
- Status s = fs_->GetChildren(directory, io_opts, &existing_files,
5090
- /*IODebugContext*=*/nullptr);
5091
- if (!s.ok()) {
5092
- corruption_messages +=
5093
- "Can't list files in " + directory + ": " + s.ToString() + "\n";
5094
- continue;
5095
- }
5096
- std::sort(existing_files.begin(), existing_files.end());
5097
-
5098
- for (const std::string& fname : dir_files.second) {
5099
- if (!std::binary_search(existing_files.begin(), existing_files.end(),
5100
- fname) &&
5101
- !std::binary_search(existing_files.begin(), existing_files.end(),
5102
- Rocks2LevelTableFileName(fname))) {
5103
- corruption_messages +=
5104
- "Missing sst file " + fname + " in " + directory + "\n";
5105
- }
5106
- }
5107
- }
5108
- } else {
5109
- for (const auto& md : metadata) {
5110
- // md.name has a leading "/".
5111
- std::string file_path = md.db_path + md.name;
5112
-
5113
- uint64_t fsize = 0;
5114
- TEST_SYNC_POINT("DBImpl::CheckConsistency:BeforeGetFileSize");
5115
- Status s = env_->GetFileSize(file_path, &fsize);
5116
- if (!s.ok() &&
5117
- env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok()) {
5118
- s = Status::OK();
5119
- }
5120
- if (!s.ok()) {
5121
- corruption_messages +=
5122
- "Can't access " + md.name + ": " + s.ToString() + "\n";
5123
- } else if (fsize != md.size) {
5124
- corruption_messages += "Sst file size mismatch: " + file_path +
5125
- ". Size recorded in manifest " +
5126
- std::to_string(md.size) + ", actual size " +
5127
- std::to_string(fsize) + "\n";
5128
- }
5129
- }
5130
- }
5131
-
5132
- if (corruption_messages.size() == 0) {
5133
- return Status::OK();
5134
- } else {
5135
- return Status::Corruption(corruption_messages);
5136
- }
5137
- }
5138
-
5139
5058
  Status DBImpl::GetDbIdentity(std::string& identity) const {
5140
5059
  identity.assign(db_id_);
5141
5060
  return Status::OK();
@@ -804,10 +804,6 @@ class DBImpl : public DB {
804
804
  // being detected.
805
805
  const Snapshot* GetSnapshotForWriteConflictBoundary();
806
806
 
807
- // checks if all live files exist on file system and that their file sizes
808
- // match to our in-memory records
809
- virtual Status CheckConsistency();
810
-
811
807
  // max_file_num_to_ignore allows bottom level compaction to filter out newly
812
808
  // compacted SST files. Setting max_file_num_to_ignore to kMaxUint64 will
813
809
  // disable the filtering
@@ -1849,10 +1849,9 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1849
1849
  0 /* max_subcompactions, not applicable */,
1850
1850
  {} /* grandparents, not applicable */,
1851
1851
  std::nullopt /* earliest_snapshot */, nullptr /* snapshot_checker */,
1852
- false /* is manual */, "" /* trim_ts */, -1 /* score, not applicable */,
1853
- false /* is deletion compaction, not applicable */,
1854
- false /* l0_files_might_overlap, not applicable */,
1855
- CompactionReason::kRefitLevel));
1852
+ CompactionReason::kRefitLevel, "" /* trim_ts */,
1853
+ -1 /* score, not applicable */,
1854
+ false /* l0_files_might_overlap, not applicable */));
1856
1855
  cfd->compaction_picker()->RegisterCompaction(c.get());
1857
1856
  TEST_SYNC_POINT("DBImpl::ReFitLevel:PostRegisterCompaction");
1858
1857
  VersionEdit edit;
@@ -4424,13 +4423,14 @@ Compaction* DBImpl::CreateIntendedCompactionForwardedToBottomPriorityPool(
4424
4423
 
4425
4424
  c->ReleaseCompactionFiles(Status::OK());
4426
4425
 
4427
- Compaction* intended_compaction = new Compaction(
4428
- vstorage, io, mo, mutable_db_options_, std::move(inputs),
4429
- c->output_level(), c->target_output_file_size(),
4430
- c->max_compaction_bytes(), c->output_path_id(), c->output_compression(),
4431
- c->output_compression_opts(), c->output_temperature(),
4432
- c->max_subcompactions(), c->grandparents(),
4433
- std::nullopt /* earliest_snapshot */, nullptr /* snapshot_checker */);
4426
+ Compaction* intended_compaction =
4427
+ new Compaction(vstorage, io, mo, mutable_db_options_, std::move(inputs),
4428
+ c->output_level(), c->target_output_file_size(),
4429
+ c->max_compaction_bytes(), c->output_path_id(),
4430
+ c->output_compression(), c->output_compression_opts(),
4431
+ c->output_temperature(), c->max_subcompactions(),
4432
+ c->grandparents(), std::nullopt /* earliest_snapshot */,
4433
+ nullptr /* snapshot_checker */, c->compaction_reason());
4434
4434
 
4435
4435
  cfd->compaction_picker()->RegisterCompaction(intended_compaction);
4436
4436
  vstorage->ComputeCompactionScore(io, mo);
@@ -70,9 +70,6 @@ Status DBImplFollower::Recover(
70
70
  }
71
71
  return s;
72
72
  }
73
- if (immutable_db_options_.paranoid_checks && s.ok()) {
74
- s = CheckConsistency();
75
- }
76
73
  if (s.ok()) {
77
74
  default_cf_handle_ = new ColumnFamilyHandleImpl(
78
75
  versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
@@ -191,12 +191,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
191
191
  "wal_compression is disabled since only zstd is supported");
192
192
  }
193
193
 
194
- if (!result.paranoid_checks) {
195
- result.skip_checking_sst_file_sizes_on_db_open = true;
196
- ROCKS_LOG_INFO(result.info_log,
197
- "file size check will be skipped during open.");
198
- }
199
-
200
194
  return result;
201
195
  }
202
196
 
@@ -694,9 +688,6 @@ Status DBImpl::Recover(
694
688
  s = MaybeUpdateNextFileNumber(recovery_ctx);
695
689
  }
696
690
 
697
- if (immutable_db_options_.paranoid_checks && s.ok()) {
698
- s = CheckConsistency();
699
- }
700
691
  if (s.ok() && !read_only) {
701
692
  // TODO: share file descriptors (FSDirectory) with SetDirectories above
702
693
  std::map<std::string, std::shared_ptr<FSDirectory>> created_dirs;
@@ -49,9 +49,6 @@ Status DBImplSecondary::Recover(
49
49
  }
50
50
  return s;
51
51
  }
52
- if (immutable_db_options_.paranoid_checks && s.ok()) {
53
- s = CheckConsistency();
54
- }
55
52
  // Initial max_total_in_memory_state_ before recovery logs.
56
53
  max_total_in_memory_state_ = 0;
57
54
  for (auto cfd : *versions_->GetColumnFamilySet()) {
@@ -653,49 +650,6 @@ Status DBImplSecondary::NewIterators(
653
650
  return Status::OK();
654
651
  }
655
652
 
656
- Status DBImplSecondary::CheckConsistency() {
657
- mutex_.AssertHeld();
658
- Status s = DBImpl::CheckConsistency();
659
- // If DBImpl::CheckConsistency() which is stricter returns success, then we
660
- // do not need to give a second chance.
661
- if (s.ok()) {
662
- return s;
663
- }
664
- // It's possible that DBImpl::CheckConssitency() can fail because the primary
665
- // may have removed certain files, causing the GetFileSize(name) call to
666
- // fail and returning a PathNotFound. In this case, we take a best-effort
667
- // approach and just proceed.
668
- TEST_SYNC_POINT_CALLBACK(
669
- "DBImplSecondary::CheckConsistency:AfterFirstAttempt", &s);
670
-
671
- if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
672
- return Status::OK();
673
- }
674
-
675
- std::vector<LiveFileMetaData> metadata;
676
- versions_->GetLiveFilesMetaData(&metadata);
677
-
678
- std::string corruption_messages;
679
- for (const auto& md : metadata) {
680
- // md.name has a leading "/".
681
- std::string file_path = md.db_path + md.name;
682
-
683
- uint64_t fsize = 0;
684
- s = env_->GetFileSize(file_path, &fsize);
685
- if (!s.ok() &&
686
- (env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok() ||
687
- s.IsPathNotFound())) {
688
- s = Status::OK();
689
- }
690
- if (!s.ok()) {
691
- corruption_messages +=
692
- "Can't access " + md.name + ": " + s.ToString() + "\n";
693
- }
694
- }
695
- return corruption_messages.empty() ? Status::OK()
696
- : Status::Corruption(corruption_messages);
697
- }
698
-
699
653
  Status DBImplSecondary::TryCatchUpWithPrimary() {
700
654
  assert(versions_.get() != nullptr);
701
655
  Status s;
@@ -894,7 +848,6 @@ Status DBImplSecondary::CompactWithoutInstallation(
894
848
 
895
849
  VersionStorageInfo* vstorage = version->storage_info();
896
850
 
897
- // Use comp_options to reuse some CompactFiles functions
898
851
  CompactionOptions comp_options;
899
852
  comp_options.compression = kDisableCompressionOption;
900
853
  comp_options.output_file_size_limit = MaxFileSizeForLevel(
@@ -913,13 +866,27 @@ Status DBImplSecondary::CompactWithoutInstallation(
913
866
  return s;
914
867
  }
915
868
 
869
+ const int job_id = next_job_id_.fetch_add(1);
870
+ JobContext job_context(job_id, true /*create_superversion*/);
871
+ std::vector<SequenceNumber> snapshots = input.snapshots;
872
+
873
+ // TODO - snapshot_checker support in Remote Compaction
874
+ job_context.InitSnapshotContext(/*checker=*/nullptr,
875
+ /*managed_snapshot=*/nullptr,
876
+ kMaxSequenceNumber, std::move(snapshots));
877
+
878
+ // TODO - consider serializing the entire Compaction object and using it as
879
+ // input instead of recreating it in the remote worker
916
880
  std::unique_ptr<Compaction> c;
917
881
  assert(cfd->compaction_picker());
918
882
  c.reset(cfd->compaction_picker()->CompactFiles(
919
883
  comp_options, input_files, input.output_level, vstorage,
920
- cfd->GetLatestMutableCFOptions(), mutable_db_options_, 0));
884
+ cfd->GetLatestMutableCFOptions(), mutable_db_options_, 0,
885
+ /*earliest_snapshot=*/job_context.snapshot_seqs.empty()
886
+ ? kMaxSequenceNumber
887
+ : job_context.snapshot_seqs.front(),
888
+ job_context.snapshot_checker));
921
889
  assert(c != nullptr);
922
-
923
890
  c->FinalizeInputInfo(version);
924
891
 
925
892
  // Create output directory if it's not existed yet
@@ -932,11 +899,6 @@ Status DBImplSecondary::CompactWithoutInstallation(
932
899
  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
933
900
  immutable_db_options_.info_log.get());
934
901
 
935
- const int job_id = next_job_id_.fetch_add(1);
936
- JobContext job_context(0, true /*create_superversion*/);
937
- std::vector<SequenceNumber> snapshots = input.snapshots;
938
- job_context.InitSnapshotContext(nullptr, nullptr, kMaxSequenceNumber,
939
- std::move(snapshots));
940
902
  // use primary host's db_id for running the compaction, but db_session_id is
941
903
  // using the local one, which is to make sure the unique id is unique from
942
904
  // the remote compactors. Because the id is generated from db_id,
@@ -248,12 +248,6 @@ class DBImplSecondary : public DBImpl {
248
248
  Status MaybeInitLogReader(uint64_t log_number,
249
249
  log::FragmentBufferedReader** log_reader);
250
250
 
251
- // Check if all live files exist on file system and that their file sizes
252
- // matche to the in-memory records. It is possible that some live files may
253
- // have been deleted by the primary. In this case, CheckConsistency() does
254
- // not flag the missing file as inconsistency.
255
- Status CheckConsistency() override;
256
-
257
251
  #ifndef NDEBUG
258
252
  Status TEST_CompactWithoutInstallation(const OpenAndCompactOptions& options,
259
253
  ColumnFamilyHandle* cfh,
@@ -8,6 +8,8 @@
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
 
10
10
  #include <functional>
11
+ #include <iomanip>
12
+ #include <iostream>
11
13
 
12
14
  #include "db/arena_wrapped_db_iter.h"
13
15
  #include "db/db_iter.h"
@@ -4139,6 +4141,190 @@ TEST_P(DBIteratorTest, AverageMemtableOpsScanFlushTriggerByOverwrites) {
4139
4141
  ASSERT_OK(db_->WaitForCompact({}));
4140
4142
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
4141
4143
  }
4144
+
4145
+ class DBMultiScanIteratorTest : public DBTestBase {
4146
+ public:
4147
+ DBMultiScanIteratorTest()
4148
+ : DBTestBase("db_multi_scan_iterator_test", /*env_do_fsync=*/true) {}
4149
+ };
4150
+
4151
+ TEST_F(DBMultiScanIteratorTest, BasicTest) {
4152
+ // Create a file
4153
+ for (int i = 0; i < 100; ++i) {
4154
+ std::stringstream ss;
4155
+ ss << std::setw(2) << std::setfill('0') << i;
4156
+ ASSERT_OK(Put("k" + ss.str(), "val" + ss.str()));
4157
+ }
4158
+ ASSERT_OK(Flush());
4159
+
4160
+ std::vector<std::string> key_ranges({"k03", "k10", "k25", "k50"});
4161
+ ReadOptions ro;
4162
+ std::vector<ScanOptions> scan_options(
4163
+ {ScanOptions(key_ranges[0], key_ranges[1]),
4164
+ ScanOptions(key_ranges[2], key_ranges[3])});
4165
+ ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
4166
+ std::unique_ptr<MultiScan> iter =
4167
+ dbfull()->NewMultiScan(ro, cfh, scan_options);
4168
+ try {
4169
+ int idx = 0;
4170
+ int count = 0;
4171
+ for (auto range : *iter) {
4172
+ for (auto it : range) {
4173
+ ASSERT_GE(it.first.ToString().compare(key_ranges[idx]), 0);
4174
+ ASSERT_LT(it.first.ToString().compare(key_ranges[idx + 1]), 0);
4175
+ count++;
4176
+ }
4177
+ idx += 2;
4178
+ }
4179
+ ASSERT_EQ(count, 32);
4180
+ } catch (MultiScanException& ex) {
4181
+ // Make sure exception contains the status
4182
+ ASSERT_NOK(ex.status());
4183
+ std::cerr << "Iterator returned status " << ex.what();
4184
+ abort();
4185
+ } catch (std::logic_error& ex) {
4186
+ std::cerr << "Iterator returned logic error " << ex.what();
4187
+ abort();
4188
+ }
4189
+ iter.reset();
4190
+
4191
+ // Test the overlapping scan case
4192
+ key_ranges[1] = "k30";
4193
+ scan_options[0] = ScanOptions(key_ranges[0], key_ranges[1]);
4194
+ iter = dbfull()->NewMultiScan(ro, cfh, scan_options);
4195
+ try {
4196
+ int idx = 0;
4197
+ int count = 0;
4198
+ for (auto range : *iter) {
4199
+ for (auto it : range) {
4200
+ ASSERT_GE(it.first.ToString().compare(key_ranges[idx]), 0);
4201
+ ASSERT_LT(it.first.ToString().compare(key_ranges[idx + 1]), 0);
4202
+ count++;
4203
+ }
4204
+ idx += 2;
4205
+ }
4206
+ ASSERT_EQ(count, 52);
4207
+ } catch (MultiScanException& ex) {
4208
+ // Make sure exception contains the status
4209
+ ASSERT_NOK(ex.status());
4210
+ std::cerr << "Iterator returned status " << ex.what();
4211
+ abort();
4212
+ } catch (std::logic_error& ex) {
4213
+ std::cerr << "Iterator returned logic error " << ex.what();
4214
+ abort();
4215
+ }
4216
+ iter.reset();
4217
+
4218
+ // Test the no limit scan case
4219
+ scan_options[0] = ScanOptions(key_ranges[0]);
4220
+ scan_options[1] = ScanOptions(key_ranges[2]);
4221
+ iter = dbfull()->NewMultiScan(ro, cfh, scan_options);
4222
+ try {
4223
+ int idx = 0;
4224
+ int count = 0;
4225
+ for (auto range : *iter) {
4226
+ for (auto it : range) {
4227
+ ASSERT_GE(it.first.ToString().compare(key_ranges[idx]), 0);
4228
+ if (it.first.ToString().compare(key_ranges[idx + 1]) == 0) {
4229
+ break;
4230
+ }
4231
+ count++;
4232
+ }
4233
+ idx += 2;
4234
+ }
4235
+ ASSERT_EQ(count, 52);
4236
+ } catch (MultiScanException& ex) {
4237
+ // Make sure exception contains the status
4238
+ ASSERT_NOK(ex.status());
4239
+ std::cerr << "Iterator returned status " << ex.what();
4240
+ abort();
4241
+ } catch (std::logic_error& ex) {
4242
+ std::cerr << "Iterator returned logic error " << ex.what();
4243
+ abort();
4244
+ }
4245
+ iter.reset();
4246
+ }
4247
+
4248
+ TEST_F(DBMultiScanIteratorTest, MixedBoundsTest) {
4249
+ // Create a file
4250
+ for (int i = 0; i < 100; ++i) {
4251
+ std::stringstream ss;
4252
+ ss << std::setw(2) << std::setfill('0') << i;
4253
+ ASSERT_OK(Put("k" + ss.str(), "val" + ss.str()));
4254
+ }
4255
+ ASSERT_OK(Flush());
4256
+
4257
+ std::vector<std::string> key_ranges(
4258
+ {"k03", "k10", "k25", "k50", "k75", "k90"});
4259
+ ReadOptions ro;
4260
+ std::vector<ScanOptions> scan_options(
4261
+ {ScanOptions(key_ranges[0], key_ranges[1]), ScanOptions(key_ranges[2]),
4262
+ ScanOptions(key_ranges[4], key_ranges[5])});
4263
+ ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
4264
+ std::unique_ptr<MultiScan> iter =
4265
+ dbfull()->NewMultiScan(ro, cfh, scan_options);
4266
+ try {
4267
+ int idx = 0;
4268
+ int count = 0;
4269
+ for (auto range : *iter) {
4270
+ for (auto it : range) {
4271
+ ASSERT_GE(it.first.ToString().compare(
4272
+ scan_options[idx].range.start->ToString()),
4273
+ 0);
4274
+ if (scan_options[idx].range.limit) {
4275
+ ASSERT_LT(it.first.ToString().compare(
4276
+ scan_options[idx].range.limit->ToString()),
4277
+ 0);
4278
+ }
4279
+ count++;
4280
+ }
4281
+ idx++;
4282
+ }
4283
+ ASSERT_EQ(count, 97);
4284
+ } catch (MultiScanException& ex) {
4285
+ // Make sure exception contains the status
4286
+ ASSERT_NOK(ex.status());
4287
+ std::cerr << "Iterator returned status " << ex.what();
4288
+ abort();
4289
+ } catch (std::logic_error& ex) {
4290
+ std::cerr << "Iterator returned logic error " << ex.what();
4291
+ abort();
4292
+ }
4293
+ iter.reset();
4294
+
4295
+ scan_options[0] = ScanOptions(key_ranges[0]);
4296
+ scan_options[1] = ScanOptions(key_ranges[2], key_ranges[3]);
4297
+ scan_options[2] = ScanOptions(key_ranges[4]);
4298
+ iter = dbfull()->NewMultiScan(ro, cfh, scan_options);
4299
+ try {
4300
+ int idx = 0;
4301
+ int count = 0;
4302
+ for (auto range : *iter) {
4303
+ for (auto it : range) {
4304
+ ASSERT_GE(it.first.ToString().compare(
4305
+ scan_options[idx].range.start->ToString()),
4306
+ 0);
4307
+ if (scan_options[idx].range.limit) {
4308
+ ASSERT_LT(it.first.ToString().compare(
4309
+ scan_options[idx].range.limit->ToString()),
4310
+ 0);
4311
+ }
4312
+ count++;
4313
+ }
4314
+ idx++;
4315
+ }
4316
+ ASSERT_EQ(count, 147);
4317
+ } catch (MultiScanException& ex) {
4318
+ // Make sure exception contains the status
4319
+ ASSERT_NOK(ex.status());
4320
+ std::cerr << "Iterator returned status " << ex.what();
4321
+ abort();
4322
+ } catch (std::logic_error& ex) {
4323
+ std::cerr << "Iterator returned logic error " << ex.what();
4324
+ abort();
4325
+ }
4326
+ iter.reset();
4327
+ }
4142
4328
  } // namespace ROCKSDB_NAMESPACE
4143
4329
 
4144
4330
  int main(int argc, char** argv) {