@nxtedition/rocksdb 14.0.0 → 15.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/binding.cc +52 -179
  2. package/deps/rocksdb/rocksdb/BUCK +7 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +29 -14
  4. package/deps/rocksdb/rocksdb/Directory.Build.props +9 -0
  5. package/deps/rocksdb/rocksdb/Makefile +6 -1
  6. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -4
  7. package/deps/rocksdb/rocksdb/ccache_msvc_compiler.bat +1 -0
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +17 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -3
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +10 -0
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +522 -60
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +69 -10
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +443 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -3
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +5 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -6
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +1 -1
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +28 -5
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +4 -4
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +6 -3
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +455 -98
  23. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +4 -2
  24. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +13 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +146 -0
  26. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +2 -2
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -0
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +18 -19
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +665 -14
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +83 -0
  33. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +68 -0
  34. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +101 -0
  35. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +44 -0
  36. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  37. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +22 -5
  38. package/deps/rocksdb/rocksdb/db/log_reader.h +4 -4
  39. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  40. package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
  41. package/deps/rocksdb/rocksdb/db/version_edit.cc +477 -139
  42. package/deps/rocksdb/rocksdb/db/version_edit.h +228 -8
  43. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +333 -0
  44. package/deps/rocksdb/rocksdb/db/write_thread.h +1 -1
  45. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  46. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +247 -32
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.cc +61 -0
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +17 -28
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +16 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +6 -1
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +46 -18
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -1
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +8 -7
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -4
  56. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1 -0
  57. package/deps/rocksdb/rocksdb/file/filename.cc +40 -0
  58. package/deps/rocksdb/rocksdb/file/filename.h +14 -1
  59. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +4 -3
  60. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -1
  61. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -7
  62. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -3
  63. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +59 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +24 -0
  66. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +2 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +4 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +13 -8
  69. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  70. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
  71. package/deps/rocksdb/rocksdb/options/options_test.cc +5 -0
  72. package/deps/rocksdb/rocksdb/src.mk +2 -0
  73. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +73 -16
  74. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +10 -5
  75. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +32 -0
  76. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +18 -27
  77. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +0 -3
  78. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -1
  79. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +708 -217
  80. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +11 -6
  81. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +5 -3
  82. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +27 -19
  83. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +24 -6
  84. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +51 -18
  85. package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +183 -0
  86. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +4 -2
  87. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +0 -2
  88. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +8 -3
  89. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -1
  90. package/deps/rocksdb/rocksdb/table/table_test.cc +222 -36
  91. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +246 -6
  92. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +86 -0
  93. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +21 -0
  94. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  95. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -1
  96. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -0
  97. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +0 -2
  98. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +12 -12
  99. package/index.js +27 -37
  100. package/package.json +1 -1
  101. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  102. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -87,6 +87,7 @@ namespace ROCKSDB_NAMESPACE {
87
87
  namespace {
88
88
 
89
89
  const std::string kDummyValue(10000, 'o');
90
+ constexpr auto kVerbose = false;
90
91
 
91
92
  // DummyPropertiesCollector used to test BlockBasedTableProperties
92
93
  class DummyPropertiesCollector : public TablePropertiesCollector {
@@ -934,7 +935,6 @@ class HarnessTest : public testing::Test {
934
935
 
935
936
  void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
936
937
  const stl_wrappers::KVMap& data) {
937
- static const bool kVerbose = false;
938
938
  InternalIterator* iter = constructor_->NewIterator();
939
939
  ASSERT_TRUE(!iter->Valid());
940
940
  stl_wrappers::KVMap::const_iterator model_iter = data.begin();
@@ -1140,15 +1140,20 @@ class TableTest : public testing::Test {
1140
1140
 
1141
1141
  class GeneralTableTest : public TableTest {};
1142
1142
  class BlockBasedTableTestBase : public TableTest {};
1143
- class BlockBasedTableTest
1144
- : public BlockBasedTableTestBase,
1145
- virtual public ::testing::WithParamInterface<uint32_t> {
1143
+ class BlockBasedTableTest : public BlockBasedTableTestBase,
1144
+ virtual public ::testing::WithParamInterface<
1145
+ std::tuple<uint32_t, size_t, size_t>> {
1146
1146
  public:
1147
- BlockBasedTableTest() : format_(GetParam()) { env_ = Env::Default(); }
1147
+ BlockBasedTableTest() : format_(std::get<0>(GetParam())) {
1148
+ env_ = Env::Default();
1149
+ }
1148
1150
 
1149
1151
  BlockBasedTableOptions GetBlockBasedTableOptions() {
1150
1152
  BlockBasedTableOptions options;
1151
1153
  options.format_version = format_;
1154
+ auto param = GetParam();
1155
+ options.super_block_alignment_size = std::get<1>(param);
1156
+ options.super_block_alignment_space_overhead_ratio = std::get<2>(param);
1152
1157
  return options;
1153
1158
  }
1154
1159
 
@@ -1380,8 +1385,12 @@ class FileChecksumTestHelper {
1380
1385
 
1381
1386
  uint64_t FileChecksumTestHelper::checksum_file_num_ = 1;
1382
1387
 
1383
- INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest,
1384
- testing::ValuesIn(test::kFooterFormatVersionsToTest));
1388
+ INSTANTIATE_TEST_CASE_P(
1389
+ FormatVersions, BlockBasedTableTest,
1390
+ testing::Combine(testing::ValuesIn(test::kFooterFormatVersionsToTest),
1391
+ testing::Values(0, 128 * 1024, 512 * 1024,
1392
+ 2 * 1024 * 1024),
1393
+ testing::Values(2048, 32, 128)));
1385
1394
 
1386
1395
  // This test serves as the living tutorial for the prefix scan of user collected
1387
1396
  // properties.
@@ -7827,7 +7836,6 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7827
7836
  read_opts.iterate_upper_bound = &ub;
7828
7837
  std::unique_ptr<Iterator> iter(db->NewIterator(read_opts, cfh));
7829
7838
  iter->Prepare(scan_opts);
7830
- static const bool kVerbose = false;
7831
7839
  for (auto opt : opts) {
7832
7840
  ub = opt.range.limit.value();
7833
7841
  iter->Seek(opt.range.start.value());
@@ -8979,8 +8987,6 @@ std::ostream& operator<<(std::ostream& os,
8979
8987
  << param.enable_compaction_with_sst_partitioner << "}";
8980
8988
  }
8981
8989
 
8982
- constexpr auto kVerbose = false;
8983
-
8984
8990
  struct DataRange {
8985
8991
  size_t start; // inclusive
8986
8992
  size_t end; // exclusive
@@ -9131,23 +9137,47 @@ class UserDefinedIndexStressTest
9131
9137
  }
9132
9138
 
9133
9139
  void CreateSstFileWithRanges(const std::string& ingest_file,
9134
- const DataRange& range) {
9135
- std::unique_ptr<SstFileWriter> writer =
9136
- std::make_unique<SstFileWriter>(EnvOptions(), options_);
9137
- ASSERT_OK(writer->Open(ingest_file));
9140
+ const std::vector<DataRange>& ranges,
9141
+ bool& data_added) {
9142
+ std::unique_ptr<SstFileWriter> writer;
9143
+ data_added = false;
9138
9144
 
9139
- assert(range.start != range.end);
9145
+ std::vector<DataRange> ranges_in_file;
9140
9146
 
9141
- if (range.is_range_delete) {
9142
- ASSERT_OK(writer->DeleteRange(range.start_key, range.end_key));
9143
- } else {
9144
- for (size_t i = range.start; i != range.end;) {
9145
- auto key = FormatKey(i);
9146
- range.start < range.end ? i++ : i--;
9147
- ASSERT_OK(writer->Put(key, range.value));
9147
+ for (auto const& range : ranges) {
9148
+ assert(range.start != range.end);
9149
+ if (range.skipped) {
9150
+ continue;
9151
+ }
9152
+
9153
+ if (writer == nullptr) {
9154
+ writer = std::make_unique<SstFileWriter>(EnvOptions(), options_);
9155
+ ASSERT_OK(writer->Open(ingest_file));
9156
+ }
9157
+ ranges_in_file.push_back(range);
9158
+
9159
+ data_added = true;
9160
+
9161
+ if (range.is_range_delete) {
9162
+ ASSERT_OK(writer->DeleteRange(range.start_key, range.end_key));
9163
+ } else {
9164
+ for (size_t i = range.start; i != range.end;) {
9165
+ auto key = FormatKey(i);
9166
+ range.start < range.end ? i++ : i--;
9167
+ ASSERT_OK(writer->Put(key, range.value));
9168
+ }
9148
9169
  }
9149
9170
  }
9150
- ASSERT_OK(writer->Finish()) << range.ToString();
9171
+ if (kVerbose) {
9172
+ std::cout << "Ingested file: " + ingest_file + "; Range: {" << std::endl;
9173
+ for (const auto& range : ranges_in_file) {
9174
+ std::cout << " " << range.ToString() << "," << std::endl;
9175
+ }
9176
+ std::cout << "}" << std::endl;
9177
+ }
9178
+ if (data_added) {
9179
+ ASSERT_OK(writer->Finish());
9180
+ }
9151
9181
  }
9152
9182
 
9153
9183
  void RangeScan(std::unique_ptr<Iterator>& iter,
@@ -9267,17 +9297,42 @@ class UserDefinedIndexStressTest
9267
9297
  void IngestFilesInOneLevel(const std::vector<DataRange>& ranges_in_level,
9268
9298
  const std::string& ingest_file_name_prefix,
9269
9299
  size_t& ingest_file_count,
9270
- const IngestExternalFileOptions& ifo) {
9300
+ const IngestExternalFileOptions& ifo,
9301
+ bool combine_ranges = false) {
9271
9302
  std::vector<std::string> ingest_files;
9272
9303
  // Generate SST file and bulk load them one level at a time
9273
- for (auto const& range : ranges_in_level) {
9274
- if (!range.skipped) {
9304
+ if (combine_ranges) {
9305
+ size_t i = 0;
9306
+ while (i < ranges_in_level.size()) {
9307
+ // if combine ranges, generate 1 SST file that combines muliple ranges
9308
+ // together
9309
+ size_t batch_end_idx =
9310
+ std::min(i + rnd.Uniform(3) + 2, ranges_in_level.size());
9311
+ bool data_added = false;
9275
9312
  ASSERT_NO_FATAL_FAILURE(CreateSstFileWithRanges(
9276
9313
  ingest_file_name_prefix + std::to_string(ingest_file_count),
9277
- range));
9278
- ingest_files.push_back(ingest_file_name_prefix +
9279
- std::to_string(ingest_file_count));
9280
- ingest_file_count++;
9314
+ {ranges_in_level.begin() + i,
9315
+ ranges_in_level.begin() + batch_end_idx},
9316
+ data_added));
9317
+ if (data_added) {
9318
+ ingest_files.push_back(ingest_file_name_prefix +
9319
+ std::to_string(ingest_file_count));
9320
+ ingest_file_count++;
9321
+ }
9322
+ i = batch_end_idx;
9323
+ }
9324
+ } else {
9325
+ for (auto const& range : ranges_in_level) {
9326
+ if (!range.skipped) {
9327
+ bool data_added = false;
9328
+ ASSERT_NO_FATAL_FAILURE(CreateSstFileWithRanges(
9329
+ ingest_file_name_prefix + std::to_string(ingest_file_count),
9330
+ {range}, data_added));
9331
+ ASSERT_TRUE(data_added);
9332
+ ingest_files.push_back(ingest_file_name_prefix +
9333
+ std::to_string(ingest_file_count));
9334
+ ingest_file_count++;
9335
+ }
9281
9336
  }
9282
9337
  }
9283
9338
 
@@ -9314,9 +9369,9 @@ class UserDefinedIndexStressTest
9314
9369
  // Becuase query count == 2, level n+1 would only prepare 3-5. but since 4-6
9315
9370
  // got deleted in the upper level, they are not returned, so only 3 is
9316
9371
  // returned. Meantime the query should have return [3, 6]
9317
- // One way to fix this is by preparing more data blocks once prepared blocks are
9318
- // exhausted, but upper bound is not reached yet.
9319
- // This requires following changes:
9372
+ // One way to fix this is by preparing more data blocks once prepared blocks
9373
+ // are exhausted, but upper bound is not reached yet. This requires following
9374
+ // changes:
9320
9375
  // 1. Fix out of bound flag in block table iterator. Only set it if the key is
9321
9376
  // larger than the upper bound.
9322
9377
  // 2. Refactor the prepared block single dimension vector into 2 dimension of
@@ -9349,12 +9404,71 @@ TEST_P(UserDefinedIndexStressTest, DISABLED_PartialDeleteRange) {
9349
9404
  ASSERT_NO_FATAL_FAILURE(ValidateQueryResult());
9350
9405
  }
9351
9406
 
9407
+ TEST_P(UserDefinedIndexStressTest, DeleteRangeMixedWithDataFile) {
9408
+ // Create 2 column families. One use normal put/del, the other uses sst
9409
+ // ingest.
9410
+ // Test the case where there are 3 levels, the middle level is a delete
9411
+ // range file that span across the entire key space. The top level file have
9412
+ // multiple files and each one has both data and delete range Scan same
9413
+ // range between the 2 CF and validate the result is same
9414
+ SCOPED_TRACE("Start with random seed: " + std::to_string(rand_seed_));
9415
+ dbname_ = test::PerThreadDBPath(
9416
+ "UserDefinedIndexStressTest_DeleteRangeMixedWithDataFile");
9417
+ SCOPED_TRACE("dbname: " + dbname_);
9418
+ ASSERT_NO_FATAL_FAILURE(SetupDB(dbname_));
9419
+
9420
+ // Test 3 levels.
9421
+ // bottom level is normal data files.
9422
+ ranges_in_levels_.push_back(GenerateKeyRanges(rnd.Uniform(3) + 4, 2, "L6"));
9423
+ // middle level delete range between each level
9424
+ if (is_reverse_comparator_) {
9425
+ ranges_in_levels_.push_back({{.start = 100,
9426
+ .end = 0,
9427
+ .is_range_delete = true,
9428
+ .skipped = false,
9429
+ .start_key = "keyz",
9430
+ .end_key = "key"}});
9431
+ } else {
9432
+ ranges_in_levels_.push_back({{.start = 0,
9433
+ .end = 100,
9434
+ .is_range_delete = true,
9435
+ .skipped = false,
9436
+ .start_key = "key",
9437
+ .end_key = "keyz"}});
9438
+ }
9439
+
9440
+ // Top level is normal data files
9441
+ ranges_in_levels_.push_back(GenerateKeyRanges(rnd.Uniform(3) + 4, 2, "L4"));
9442
+
9443
+ IngestExternalFileOptions ifo;
9444
+ ifo.snapshot_consistency = false;
9445
+ auto ingest_file_name_prefix = dbname_ + "ingest_file_";
9446
+ size_t ingest_file_count = 0;
9447
+ auto first_level = true;
9448
+ for (auto const& ranges_in_level : ranges_in_levels_) {
9449
+ ASSERT_NO_FATAL_FAILURE(
9450
+ IngestFilesInOneLevel(ranges_in_level, ingest_file_name_prefix,
9451
+ ingest_file_count, ifo, true));
9452
+ if (first_level) {
9453
+ first_level = false;
9454
+ if (enable_compaction_with_sst_partitioner_) {
9455
+ // When compaction is enabled, do a compaction at the first level
9456
+ ASSERT_NO_FATAL_FAILURE(CompactIngestedCF());
9457
+ }
9458
+ }
9459
+ }
9460
+
9461
+ ASSERT_NO_FATAL_FAILURE(AddDataToRegularCF());
9462
+
9463
+ ASSERT_NO_FATAL_FAILURE(ValidateQueryResult());
9464
+ }
9465
+
9352
9466
  TEST_P(UserDefinedIndexStressTest, DeleteRange) {
9353
9467
  // Create 2 column families. One use normal put/del, the other uses sst
9354
9468
  // ingest.
9355
- // Test the case where there are 3 levels, the middle level is a delete range
9356
- // file that span across the entire key space.
9357
- // Range scan same range between the 2 CF and validate the result is same
9469
+ // Test the case where there are 3 levels, the middle level is a delete
9470
+ // range file that span across the entire key space. Range scan same range
9471
+ // between the 2 CF and validate the result is same
9358
9472
  SCOPED_TRACE("Start with random seed: " + std::to_string(rand_seed_));
9359
9473
  dbname_ = test::PerThreadDBPath("UserDefinedIndexStressTest_DeleteRange");
9360
9474
  SCOPED_TRACE("dbname: " + dbname_);
@@ -9404,6 +9518,78 @@ TEST_P(UserDefinedIndexStressTest, DeleteRange) {
9404
9518
  ASSERT_NO_FATAL_FAILURE(ValidateQueryResult());
9405
9519
  }
9406
9520
 
9521
+ TEST_P(UserDefinedIndexStressTest, AtomicReplaceBulkLoad) {
9522
+ // Create 2 column families. One use normal put/del, the other uses sst
9523
+ // ingest.
9524
+ // Test the case where there are 3 levels, the middle level is a delete
9525
+ // range file that span across the entire key space. Range scan same range
9526
+ // between the 2 CF and validate the result is same
9527
+ SCOPED_TRACE("Start with random seed: " + std::to_string(rand_seed_));
9528
+ dbname_ = test::PerThreadDBPath("UserDefinedIndexStressTest_DeleteRange");
9529
+ SCOPED_TRACE("dbname: " + dbname_);
9530
+ ASSERT_NO_FATAL_FAILURE(SetupDB(dbname_));
9531
+
9532
+ // Test 3 levels.
9533
+ // bottom level is normal data files.
9534
+ ranges_in_levels_.push_back(GenerateKeyRanges(rnd.Uniform(3) + 4, 2, "L6"));
9535
+ // middle level delete range between each level
9536
+ if (is_reverse_comparator_) {
9537
+ ranges_in_levels_.push_back({{.start = 100,
9538
+ .end = 0,
9539
+ .is_range_delete = true,
9540
+ .skipped = false,
9541
+ .start_key = "keyz",
9542
+ .end_key = "key"}});
9543
+ } else {
9544
+ ranges_in_levels_.push_back({{.start = 0,
9545
+ .end = 100,
9546
+ .is_range_delete = true,
9547
+ .skipped = false,
9548
+ .start_key = "key",
9549
+ .end_key = "keyz"}});
9550
+ }
9551
+ // Top level is normal data files
9552
+ ranges_in_levels_.push_back(GenerateKeyRanges(rnd.Uniform(3) + 4, 2, "L4"));
9553
+
9554
+ IngestExternalFileOptions ifo;
9555
+ ifo.snapshot_consistency = false;
9556
+ auto ingest_file_name_prefix = dbname_ + "ingest_file_";
9557
+ size_t ingest_file_count = 0;
9558
+ auto first_level = true;
9559
+ for (auto const& ranges_in_level : ranges_in_levels_) {
9560
+ ASSERT_NO_FATAL_FAILURE(IngestFilesInOneLevel(
9561
+ ranges_in_level, ingest_file_name_prefix, ingest_file_count, ifo));
9562
+ if (first_level) {
9563
+ first_level = false;
9564
+ if (enable_compaction_with_sst_partitioner_) {
9565
+ // When compaction is enabled, do a compaction at the first level
9566
+ ASSERT_NO_FATAL_FAILURE(CompactIngestedCF());
9567
+ }
9568
+ }
9569
+ }
9570
+
9571
+ // Ingest the a new file with atomic replace with full key space, this layer
9572
+ // is exactly same as the one at Level 4
9573
+ bool data_added;
9574
+ ASSERT_NO_FATAL_FAILURE(CreateSstFileWithRanges(
9575
+ ingest_file_name_prefix + std::to_string(++ingest_file_count),
9576
+ ranges_in_levels_[2], data_added));
9577
+
9578
+ IngestExternalFileArg ingest_arg;
9579
+ ingest_arg.column_family = ingest_cfh_;
9580
+ ingest_arg.options = ifo;
9581
+ ingest_arg.external_files.push_back(ingest_file_name_prefix +
9582
+ std::to_string(ingest_file_count));
9583
+ ingest_arg.atomic_replace_range = RangeOpt(nullptr, nullptr);
9584
+
9585
+ ASSERT_OK(db_->IngestExternalFiles(
9586
+ std::vector<IngestExternalFileArg>({ingest_arg})));
9587
+
9588
+ ASSERT_NO_FATAL_FAILURE(AddDataToRegularCF());
9589
+
9590
+ ASSERT_NO_FATAL_FAILURE(ValidateQueryResult());
9591
+ }
9592
+
9407
9593
  INSTANTIATE_TEST_CASE_P(
9408
9594
  UserDefinedIndexStressTest, UserDefinedIndexStressTest,
9409
9595
  testing::Combine(testing::Values(BytewiseComparator(),
@@ -159,6 +159,7 @@ DEFINE_string(
159
159
  "readrandomoperands,"
160
160
  "backup,"
161
161
  "restore,"
162
+ "openandcompact,"
162
163
  "approximatememtablestats",
163
164
 
164
165
  "Comma-separated list of operations to run in the specified"
@@ -230,6 +231,9 @@ DEFINE_string(
230
231
  "\tcompact1 -- compact L1 into L2\n"
231
232
  "\twaitforcompaction - pause until compaction is (probably) done\n"
232
233
  "\tflush - flush the memtable\n"
234
+ "\topenandcompact -- Open DB and compact all files to bottommost level, "
235
+ "writing output to separate directory without modifying source DB. "
236
+ "Designed for remote compaction service testing\n"
233
237
  "\tstats -- Print DB stats\n"
234
238
  "\tresetstats -- Reset DB stats\n"
235
239
  "\tlevelstats -- Print the number of files and bytes per level\n"
@@ -717,6 +721,16 @@ DEFINE_bool(block_align,
717
721
  ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_align,
718
722
  "Align data blocks on page size");
719
723
 
724
+ DEFINE_uint64(
725
+ super_block_alignment_size,
726
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions().super_block_alignment_size,
727
+ "Configure super block size");
728
+
729
+ DEFINE_uint64(super_block_alignment_space_overhead_ratio,
730
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions()
731
+ .super_block_alignment_space_overhead_ratio,
732
+ "Configure space overhead for super block alignment");
733
+
720
734
  DEFINE_int64(prepopulate_block_cache, 0,
721
735
  "Pre-populate hot/warm blocks in block cache. 0 to disable and 1 "
722
736
  "to insert during flush");
@@ -1862,6 +1876,18 @@ DEFINE_bool(
1862
1876
  .use_async_io,
1863
1877
  "Sets MultiScanArgs::use_async_io");
1864
1878
 
1879
+ DEFINE_bool(openandcompact_allow_resumption, false,
1880
+ "Whether to keep existing progress and enable resume compaction in "
1881
+ "OpenAndCompact benchmark");
1882
+
1883
+ DEFINE_bool(openandcompact_test_cancel_on_odd, false,
1884
+ "During OpenAndCompact[Xn], odd runs gets cancelled "
1885
+ "after specified `openandcompact_cancel_after_millseconds`");
1886
+
1887
+ DEFINE_uint32(openandcompact_cancel_after_millseconds, 1,
1888
+ "Time to wait before cancelling compaction in odd runs when "
1889
+ "openandcompact_test_cancel_on_odd is true");
1890
+
1865
1891
  namespace ROCKSDB_NAMESPACE {
1866
1892
  namespace {
1867
1893
  static Status CreateMemTableRepFactory(
@@ -2615,24 +2641,33 @@ class CombinedStats {
2615
2641
  const char* name = bench_name.c_str();
2616
2642
  int num_runs = static_cast<int>(throughput_ops_.size());
2617
2643
 
2644
+ double avg_ops_per_sec = CalcAvg(throughput_ops_);
2645
+ double avg_millis_per_op =
2646
+ (avg_ops_per_sec > 0) ? (1000.0 / avg_ops_per_sec) : 0;
2647
+
2648
+ printf("\n");
2649
+
2618
2650
  if (throughput_mbs_.size() == throughput_ops_.size()) {
2619
2651
  // \xC2\xB1 is +/- character in UTF-8
2620
2652
  fprintf(stdout,
2621
- "%s [AVG %d runs] : %d (\xC2\xB1 %d) ops/sec; %6.1f (\xC2\xB1 "
2653
+ "%s [AVG %d runs] : %d (\xC2\xB1 %d) ops/sec; %.3f ms/op; "
2654
+ "%6.1f (\xC2\xB1 "
2622
2655
  "%.1f) MB/sec\n"
2623
2656
  "%s [MEDIAN %d runs] : %d ops/sec; %6.1f MB/sec\n",
2624
2657
  name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)),
2625
2658
  static_cast<int>(CalcConfidence95(throughput_ops_)),
2626
- CalcAvg(throughput_mbs_), CalcConfidence95(throughput_mbs_), name,
2627
- num_runs, static_cast<int>(CalcMedian(throughput_ops_)),
2659
+ avg_millis_per_op, CalcAvg(throughput_mbs_),
2660
+ CalcConfidence95(throughput_mbs_), name, num_runs,
2661
+ static_cast<int>(CalcMedian(throughput_ops_)),
2628
2662
  CalcMedian(throughput_mbs_));
2629
2663
  } else {
2630
2664
  fprintf(stdout,
2631
- "%s [AVG %d runs] : %d (\xC2\xB1 %d) ops/sec\n"
2665
+ "%s [AVG %d runs] : %d (\xC2\xB1 %d) ops/sec; %.3f ms/op\n"
2632
2666
  "%s [MEDIAN %d runs] : %d ops/sec\n",
2633
2667
  name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)),
2634
- static_cast<int>(CalcConfidence95(throughput_ops_)), name,
2635
- num_runs, static_cast<int>(CalcMedian(throughput_ops_)));
2668
+ static_cast<int>(CalcConfidence95(throughput_ops_)),
2669
+ avg_millis_per_op, name, num_runs,
2670
+ static_cast<int>(CalcMedian(throughput_ops_)));
2636
2671
  }
2637
2672
  }
2638
2673
 
@@ -2791,6 +2826,8 @@ class Duration {
2791
2826
  uint64_t start_at_;
2792
2827
  };
2793
2828
 
2829
+ // Global run counter for cancel/resume-OpenAndCompact() testing
2830
+ static std::atomic<int> openandcompact_run_counter{0};
2794
2831
  class Benchmark {
2795
2832
  private:
2796
2833
  std::shared_ptr<Cache> cache_;
@@ -3843,6 +3880,9 @@ class Benchmark {
3843
3880
  method = &Benchmark::Backup;
3844
3881
  } else if (name == "restore") {
3845
3882
  method = &Benchmark::Restore;
3883
+ } else if (name == "openandcompact") {
3884
+ fresh_db = false;
3885
+ method = &Benchmark::OpenAndCompact;
3846
3886
  } else if (!name.empty()) { // No error message for empty name
3847
3887
  fprintf(stderr, "unknown benchmark '%s'\n", name.c_str());
3848
3888
  ErrorExit();
@@ -5172,6 +5212,206 @@ class Benchmark {
5172
5212
  DoWrite(thread, UNIQUE_RANDOM);
5173
5213
  }
5174
5214
 
5215
+ void OpenAndCompact(ThreadState* thread) {
5216
+ if (thread->tid != 0) {
5217
+ return;
5218
+ }
5219
+
5220
+ int current_run = ++openandcompact_run_counter;
5221
+ bool is_odd_run = (current_run % 2 == 1);
5222
+
5223
+ if (FLAGS_openandcompact_test_cancel_on_odd) {
5224
+ const char* even_description = FLAGS_openandcompact_allow_resumption
5225
+ ? "even - resume"
5226
+ : "even - normal";
5227
+ fprintf(stdout, "\n--- Run %d (%s) ---\n", current_run,
5228
+ is_odd_run ? "odd - will cancel" : even_description);
5229
+ }
5230
+
5231
+ Status create_status =
5232
+ db_.db->GetEnv()->CreateDirIfMissing(FLAGS_secondary_path);
5233
+ if (!create_status.ok()) {
5234
+ fprintf(stderr, "Failed to create secondary path: %s\n",
5235
+ create_status.ToString().c_str());
5236
+ return;
5237
+ }
5238
+
5239
+ std::string options_file;
5240
+ Status options_status =
5241
+ GetLatestOptionsFileName(FLAGS_db, db_.db->GetEnv(), &options_file);
5242
+ if (!options_status.ok()) {
5243
+ fprintf(stderr, "FAILED: Cannot find OPTIONS file in %s: %s\n",
5244
+ FLAGS_db.c_str(), options_status.ToString().c_str());
5245
+ return;
5246
+ }
5247
+
5248
+ uint64_t options_file_number;
5249
+ FileType type;
5250
+ if (!ParseFileName(options_file, &options_file_number, &type) ||
5251
+ type != kOptionsFile) {
5252
+ fprintf(stderr, "FAILED: Cannot parse OPTIONS file number from %s\n",
5253
+ options_file.c_str());
5254
+ return;
5255
+ }
5256
+
5257
+ CompactionServiceInput compaction_input;
5258
+ compaction_input.cf_name = kDefaultColumnFamilyName;
5259
+
5260
+ std::vector<std::string> input_file_names;
5261
+ ColumnFamilyMetaData cf_meta;
5262
+ db_.db->GetColumnFamilyMetaData(&cf_meta);
5263
+
5264
+ uint64_t total_input_keys = 0;
5265
+ uint64_t total_input_files = 0;
5266
+
5267
+ // Collect files from all levels for full compaction
5268
+ for (const auto& level : cf_meta.levels) {
5269
+ for (const auto& file : level.files) {
5270
+ input_file_names.push_back(file.name);
5271
+ total_input_keys += file.num_entries;
5272
+ total_input_files++;
5273
+ }
5274
+ }
5275
+
5276
+ // Set output level to configured bottom level (num_levels - 1)
5277
+ compaction_input.output_level = FLAGS_num_levels - 1;
5278
+ compaction_input.db_id = "db_bench_openandcompact";
5279
+ compaction_input.options_file_number = options_file_number;
5280
+
5281
+ compaction_input.input_files = input_file_names;
5282
+
5283
+ std::string input_string;
5284
+ Status serialize_status = compaction_input.Write(&input_string);
5285
+ if (!serialize_status.ok()) {
5286
+ fprintf(stderr, "FAILED: Cannot serialize compaction input: %s\n",
5287
+ serialize_status.ToString().c_str());
5288
+ return;
5289
+ }
5290
+
5291
+ fprintf(stdout, "\nInput files: %" PRIu64 " files, %" PRIu64 " keys\n",
5292
+ total_input_files, total_input_keys);
5293
+
5294
+ std::string output_directory =
5295
+ FLAGS_secondary_path + "/openandcompact_" + std::to_string(thread->tid);
5296
+
5297
+ // Always clean up in odd run, depending on
5298
+ // !FLAGS_openandcompact_allow_resumption in even run
5299
+ bool should_cleanup = is_odd_run || !FLAGS_openandcompact_allow_resumption;
5300
+
5301
+ if (should_cleanup) {
5302
+ std::vector<std::string> children;
5303
+ Status list_status = FLAGS_env->GetChildren(output_directory, &children);
5304
+ if (list_status.ok()) {
5305
+ for (const auto& child : children) {
5306
+ if (child != "." && child != "..") {
5307
+ std::string child_path = output_directory + "/" + child;
5308
+ Status del_status = FLAGS_env->DeleteFile(child_path);
5309
+ if (!del_status.ok()) {
5310
+ fprintf(stderr, "Warning: Failed to delete file %s: %s\n",
5311
+ child_path.c_str(), del_status.ToString().c_str());
5312
+ }
5313
+ }
5314
+ }
5315
+ Status del_dir_status = FLAGS_env->DeleteDir(output_directory);
5316
+ if (!del_dir_status.ok()) {
5317
+ fprintf(stderr, "Warning: Failed to delete directory %s: %s\n",
5318
+ output_directory.c_str(), del_dir_status.ToString().c_str());
5319
+ }
5320
+ }
5321
+ }
5322
+
5323
+ Status create_output_status =
5324
+ FLAGS_env->CreateDirIfMissing(output_directory);
5325
+ if (!create_output_status.ok()) {
5326
+ fprintf(stderr, "Failed to create output directory %s: %s\n",
5327
+ output_directory.c_str(),
5328
+ create_output_status.ToString().c_str());
5329
+ return;
5330
+ }
5331
+
5332
+ std::string result_string;
5333
+
5334
+ CompactionServiceOptionsOverride options_override;
5335
+ options_override.env = FLAGS_env;
5336
+ BlockBasedTableOptions table_options;
5337
+ options_override.table_factory.reset(
5338
+ NewBlockBasedTableFactory(table_options));
5339
+
5340
+ OpenAndCompactOptions options;
5341
+ std::atomic<bool> should_cancel{false};
5342
+ options.canceled = &should_cancel;
5343
+ options.allow_resumption = FLAGS_openandcompact_allow_resumption;
5344
+
5345
+ Status s;
5346
+ uint64_t start_time = FLAGS_env->NowMicros();
5347
+ uint64_t end_time = start_time;
5348
+
5349
+ if (FLAGS_openandcompact_test_cancel_on_odd && is_odd_run) {
5350
+ std::thread compaction_thread([&]() {
5351
+ s = DB::OpenAndCompact(options, FLAGS_db, output_directory,
5352
+ input_string, &result_string, options_override);
5353
+ end_time = FLAGS_env->NowMicros();
5354
+ });
5355
+
5356
+ std::thread cancellation_timer([&]() {
5357
+ std::this_thread::sleep_for(std::chrono::milliseconds(
5358
+ FLAGS_openandcompact_cancel_after_millseconds));
5359
+ should_cancel.store(true);
5360
+ });
5361
+
5362
+ compaction_thread.join();
5363
+ cancellation_timer.join();
5364
+ } else {
5365
+ // Normal synchronous operation for even runs or when test_cancel_on_odd
5366
+ // is false
5367
+ s = DB::OpenAndCompact(options, FLAGS_db, output_directory, input_string,
5368
+ &result_string, options_override);
5369
+ end_time = FLAGS_env->NowMicros();
5370
+ }
5371
+
5372
+ uint64_t latency_micros = end_time - start_time;
5373
+ double latency_seconds = latency_micros / 1000000.0;
5374
+
5375
+ fprintf(stdout,
5376
+ "OpenAndCompact() API call : %.3f micros/op %.3f seconds/op\n",
5377
+ (double)latency_micros, latency_seconds);
5378
+
5379
+ fprintf(stdout, "OpenAndCompact status: %s\n", s.ToString().c_str());
5380
+
5381
+ if (FLAGS_openandcompact_test_cancel_on_odd && is_odd_run) {
5382
+ if (!s.IsManualCompactionPaused()) {
5383
+ fprintf(stdout, "Fail to cancel compaction");
5384
+ }
5385
+ return;
5386
+ } else if (!s.ok()) {
5387
+ fprintf(stderr, "OpenAndCompact failed: %s\n", s.ToString().c_str());
5388
+ return;
5389
+ }
5390
+
5391
+ CompactionServiceResult compaction_result;
5392
+ Status parse_status =
5393
+ CompactionServiceResult::Read(result_string, &compaction_result);
5394
+ if (parse_status.ok()) {
5395
+ uint64_t total_output_size = 0;
5396
+ for (const auto& output_file : compaction_result.output_files) {
5397
+ total_output_size += output_file.file_size;
5398
+ }
5399
+
5400
+ uint64_t num_output_files = compaction_result.output_files.size();
5401
+ uint64_t avg_output_file_size =
5402
+ num_output_files > 0 ? total_output_size / num_output_files : 0;
5403
+
5404
+ fprintf(stdout,
5405
+ "Output: %" PRIu64 " files, average size: %" PRIu64
5406
+ " bytes (%.2f MB)\n",
5407
+ num_output_files, avg_output_file_size,
5408
+ avg_output_file_size / (1024.0 * 1024.0));
5409
+ } else {
5410
+ fprintf(stderr, "Failed to parse compaction result: %s\n",
5411
+ parse_status.ToString().c_str());
5412
+ }
5413
+ }
5414
+
5175
5415
  class KeyGenerator {
5176
5416
  public:
5177
5417
  KeyGenerator(Random64* rand, WriteMode mode, uint64_t num,