@nxtedition/rocksdb 14.0.0 → 15.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/binding.cc +52 -179
  2. package/deps/rocksdb/rocksdb/BUCK +7 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +29 -14
  4. package/deps/rocksdb/rocksdb/Directory.Build.props +9 -0
  5. package/deps/rocksdb/rocksdb/Makefile +6 -1
  6. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -4
  7. package/deps/rocksdb/rocksdb/ccache_msvc_compiler.bat +1 -0
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +17 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -3
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +10 -0
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +522 -60
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +69 -10
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +443 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -3
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +5 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -6
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +1 -1
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +28 -5
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +4 -4
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +6 -3
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +455 -98
  23. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +4 -2
  24. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +13 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +146 -0
  26. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +2 -2
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -0
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +18 -19
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +665 -14
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +83 -0
  33. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +68 -0
  34. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +101 -0
  35. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +44 -0
  36. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  37. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +22 -5
  38. package/deps/rocksdb/rocksdb/db/log_reader.h +4 -4
  39. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  40. package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
  41. package/deps/rocksdb/rocksdb/db/version_edit.cc +477 -139
  42. package/deps/rocksdb/rocksdb/db/version_edit.h +228 -8
  43. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +333 -0
  44. package/deps/rocksdb/rocksdb/db/write_thread.h +1 -1
  45. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  46. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +247 -32
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.cc +61 -0
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +17 -28
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +16 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +6 -1
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +46 -18
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -1
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +8 -7
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -4
  56. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1 -0
  57. package/deps/rocksdb/rocksdb/file/filename.cc +40 -0
  58. package/deps/rocksdb/rocksdb/file/filename.h +14 -1
  59. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +4 -3
  60. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -1
  61. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -7
  62. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -3
  63. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +59 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +24 -0
  66. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +2 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +4 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +13 -8
  69. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  70. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
  71. package/deps/rocksdb/rocksdb/options/options_test.cc +5 -0
  72. package/deps/rocksdb/rocksdb/src.mk +2 -0
  73. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +73 -16
  74. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +10 -5
  75. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +32 -0
  76. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +18 -27
  77. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +0 -3
  78. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -1
  79. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +708 -217
  80. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +11 -6
  81. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +5 -3
  82. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +27 -19
  83. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +24 -6
  84. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +51 -18
  85. package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +183 -0
  86. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +4 -2
  87. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +0 -2
  88. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +8 -3
  89. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -1
  90. package/deps/rocksdb/rocksdb/table/table_test.cc +222 -36
  91. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +246 -6
  92. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +86 -0
  93. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +21 -0
  94. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  95. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -1
  96. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -0
  97. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +0 -2
  98. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +12 -12
  99. package/index.js +27 -37
  100. package/package.json +1 -1
  101. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  102. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -176,9 +176,20 @@ class CompactionJob {
176
176
  // and organizing seqno <-> time info. `known_single_subcompact` is non-null
177
177
  // if we already have a known single subcompaction, with optional key bounds
178
178
  // (currently for executing a remote compaction).
179
+ //
180
+ // @param compaction_progress Previously saved compaction progress
181
+ // to resume from. If empty, compaction starts fresh from the
182
+ // beginning.
183
+ //
184
+ // @param compaction_progress_writer Writer for persisting
185
+ // subcompaction progress periodically during compaction
186
+ // execution. If nullptr, progress tracking is disabled and compaction
187
+ // cannot be resumed later.
179
188
  void Prepare(
180
189
  std::optional<std::pair<std::optional<Slice>, std::optional<Slice>>>
181
- known_single_subcompact);
190
+ known_single_subcompact,
191
+ const CompactionProgress& compaction_progress = CompactionProgress{},
192
+ log::Writer* compaction_progress_writer = nullptr);
182
193
 
183
194
  // REQUIRED mutex not held
184
195
  // Launch threads for each subcompaction and wait for them to finish. After
@@ -197,6 +208,7 @@ class CompactionJob {
197
208
 
198
209
  protected:
199
210
  void UpdateCompactionJobOutputStatsFromInternalStats(
211
+ const Status& status,
200
212
  const InternalStats::CompactionStatsFull& internal_stats) const;
201
213
 
202
214
  void LogCompaction();
@@ -259,6 +271,10 @@ class CompactionJob {
259
271
  // consecutive groups such that each group has a similar size.
260
272
  void GenSubcompactionBoundaries();
261
273
 
274
+ void MaybeAssignCompactionProgressAndWriter(
275
+ const CompactionProgress& compaction_progress,
276
+ log::Writer* compaction_progress_writer);
277
+
262
278
  // Get the number of planned subcompactions based on max_subcompactions and
263
279
  // extra reserved resources
264
280
  uint64_t GetSubcompactionsLimit();
@@ -359,8 +375,9 @@ class CompactionJob {
359
375
  const CompactionFilter* configured_compaction_filter,
360
376
  const CompactionFilter*& compaction_filter,
361
377
  std::unique_ptr<CompactionFilter>& compaction_filter_from_factory);
362
- void InitializeReadOptions(ColumnFamilyData* cfd, ReadOptions& read_options,
363
- SubcompactionKeyBoundaries& boundaries);
378
+ void InitializeReadOptionsAndBoundaries(
379
+ size_t ts_sz, ReadOptions& read_options,
380
+ SubcompactionKeyBoundaries& boundaries);
364
381
  InternalIterator* CreateInputIterator(
365
382
  SubcompactionState* sub_compact, ColumnFamilyData* cfd,
366
383
  SubcompactionInternalIterators& iterators,
@@ -411,12 +428,12 @@ class CompactionJob {
411
428
  // update the thread status for starting a compaction.
412
429
  void ReportStartedCompaction(Compaction* compaction);
413
430
 
414
- Status FinishCompactionOutputFile(const Status& input_status,
415
- SubcompactionState* sub_compact,
416
- CompactionOutputs& outputs,
417
- const Slice& next_table_min_key,
418
- const Slice* comp_start_user_key,
419
- const Slice* comp_end_user_key);
431
+ Status FinishCompactionOutputFile(
432
+ const Status& input_status,
433
+ const ParsedInternalKey& prev_table_last_internal_key,
434
+ const Slice& next_table_min_key, const Slice* comp_start_user_key,
435
+ const Slice* comp_end_user_key, const CompactionIterator* c_iter,
436
+ SubcompactionState* sub_compact, CompactionOutputs& outputs);
420
437
  Status InstallCompactionResults(bool* compaction_released);
421
438
  Status OpenCompactionOutputFile(SubcompactionState* sub_compact,
422
439
  CompactionOutputs& outputs);
@@ -493,6 +510,9 @@ class CompactionJob {
493
510
  // Setting this requires DBMutex.
494
511
  uint64_t options_file_number_ = 0;
495
512
 
513
+ // Writer for persisting compaction progress during compaction
514
+ log::Writer* compaction_progress_writer_ = nullptr;
515
+
496
516
  // Get table file name in where it's outputting to, which should also be in
497
517
  // `output_directory_`.
498
518
  virtual std::string GetTableFileName(uint64_t file_number);
@@ -500,6 +520,43 @@ class CompactionJob {
500
520
  // The Compaction Read and Write priorities are the same for different
501
521
  // scenarios, such as write stalled.
502
522
  Env::IOPriority GetRateLimiterPriority();
523
+
524
+ Status MaybeResumeSubcompactionProgressOnInputIterator(
525
+ SubcompactionState* sub_compact, InternalIterator* input_iter);
526
+
527
+ Status ReadOutputFilesTableProperties(
528
+ const autovector<FileMetaData>& temporary_output_file_allocation,
529
+ const ReadOptions& read_options,
530
+ std::vector<std::shared_ptr<const TableProperties>>&
531
+ output_files_table_properties,
532
+ bool is_proximal_level = false);
533
+
534
+ Status ReadTablePropertiesDirectly(
535
+ const ImmutableOptions& ioptions, const MutableCFOptions& moptions,
536
+ const FileMetaData* file_meta, const ReadOptions& read_options,
537
+ std::shared_ptr<const TableProperties>* tp);
538
+
539
+ void RestoreCompactionOutputs(
540
+ const ColumnFamilyData* cfd,
541
+ const std::vector<std::shared_ptr<const TableProperties>>&
542
+ output_files_table_properties,
543
+ SubcompactionProgressPerLevel& subcompaction_progress_per_level,
544
+ CompactionOutputs* outputs_to_restore);
545
+
546
+ bool ShouldUpdateSubcompactionProgress(
547
+ const SubcompactionState* sub_compact, const CompactionIterator* c_iter,
548
+ const ParsedInternalKey& prev_table_last_internal_key,
549
+ const Slice& next_table_min_internal_key, const FileMetaData* meta) const;
550
+
551
+ void UpdateSubcompactionProgress(const CompactionIterator* c_iter,
552
+ const Slice next_table_min_key,
553
+ SubcompactionState* sub_compact);
554
+
555
+ Status PersistSubcompactionProgress(SubcompactionState* sub_compact);
556
+
557
+ void UpdateSubcompactionProgressPerLevel(
558
+ SubcompactionState* sub_compact, bool is_proximal_level,
559
+ SubcompactionProgress& subcompaction_progress);
503
560
  };
504
561
 
505
562
  // CompactionServiceInput is used the pass compaction information between two
@@ -649,7 +706,9 @@ class CompactionServiceCompactionJob : private CompactionJob {
649
706
 
650
707
  // REQUIRED: mutex held
651
708
  // Like CompactionJob::Prepare()
652
- void Prepare();
709
+ void Prepare(
710
+ const CompactionProgress& compaction_progress = CompactionProgress{},
711
+ log::Writer* compaction_progress_writer = nullptr);
653
712
 
654
713
  // Run the compaction in current thread and return the result
655
714
  Status Run();
@@ -17,6 +17,7 @@
17
17
  #include "db/db_impl/db_impl.h"
18
18
  #include "db/error_handler.h"
19
19
  #include "db/version_set.h"
20
+ #include "file/filename.h"
20
21
  #include "file/random_access_file_reader.h"
21
22
  #include "file/writable_file_writer.h"
22
23
  #include "options/options_helper.h"
@@ -2409,6 +2410,448 @@ TEST_F(CompactionJobIOPriorityTest, GetRateLimiterPriority) {
2409
2410
  Env::IO_LOW, Env::IO_LOW);
2410
2411
  }
2411
2412
 
2413
+ class ResumableCompactionJobTest : public CompactionJobTestBase {
2414
+ public:
2415
+ ResumableCompactionJobTest()
2416
+ : CompactionJobTestBase(
2417
+ test::PerThreadDBPath("allow_resumption_job_test"),
2418
+ BytewiseComparator(), [](uint64_t /*ts*/) { return ""; },
2419
+ /*test_io_priority=*/false, TableTypeForTest::kBlockBasedTable) {}
2420
+
2421
+ protected:
2422
+ static constexpr const char* kCancelBeforeThisKey = "cancel_before_this_key";
2423
+ std::string progress_dir_ = "";
2424
+ bool enable_cancel_ = false;
2425
+ std::atomic<int> stop_count_{0};
2426
+ std::atomic<bool> cancel_{false};
2427
+
2428
+ void SetUp() override {
2429
+ CompactionJobTestBase::SetUp();
2430
+ SyncPoint::GetInstance()->SetCallBack(
2431
+ "CompactionOutputs::ShouldStopBefore::manual_decision",
2432
+ [this](void* p) {
2433
+ auto* pair = static_cast<std::pair<bool*, const Slice>*>(p);
2434
+ *(pair->first) = true;
2435
+
2436
+ // Cancel after outputting a specific key
2437
+ if (enable_cancel_) {
2438
+ ParsedInternalKey parsed_key;
2439
+ if (ParseInternalKey(pair->second, &parsed_key, true).ok()) {
2440
+ if (parsed_key.user_key == kCancelBeforeThisKey) {
2441
+ cancel_.store(true);
2442
+ }
2443
+ }
2444
+ }
2445
+ });
2446
+ SyncPoint::GetInstance()->EnableProcessing();
2447
+ }
2448
+
2449
+ void TearDown() override {
2450
+ SyncPoint::GetInstance()->DisableProcessing();
2451
+ SyncPoint::GetInstance()->ClearAllCallBacks();
2452
+
2453
+ if (env_->FileExists(progress_dir_).ok()) {
2454
+ std::vector<std::string> files;
2455
+ EXPECT_OK(env_->GetChildren(progress_dir_, &files));
2456
+ for (const auto& file : files) {
2457
+ if (file != "." && file != "..") {
2458
+ EXPECT_OK(env_->DeleteFile(progress_dir_ + "/" + file));
2459
+ }
2460
+ }
2461
+ EXPECT_OK(env_->DeleteDir(progress_dir_));
2462
+ }
2463
+
2464
+ CompactionJobTestBase::TearDown();
2465
+ }
2466
+
2467
+ void NewDB() {
2468
+ if (env_->FileExists(progress_dir_).ok()) {
2469
+ std::vector<std::string> files;
2470
+ EXPECT_OK(env_->GetChildren(progress_dir_, &files));
2471
+ for (const auto& file : files) {
2472
+ if (file != "." && file != "..") {
2473
+ EXPECT_OK(env_->DeleteFile(progress_dir_ + "/" + file));
2474
+ }
2475
+ }
2476
+ EXPECT_OK(env_->DeleteDir(progress_dir_));
2477
+ }
2478
+
2479
+ CompactionJobTestBase::NewDB();
2480
+
2481
+ progress_dir_ = test::PerThreadDBPath("compaction_progress");
2482
+ ASSERT_OK(env_->CreateDirIfMissing(progress_dir_));
2483
+ }
2484
+
2485
+ void EnableCompactionCancel() { enable_cancel_ = true; }
2486
+
2487
+ void DisableCompactionCancel() {
2488
+ enable_cancel_ = false;
2489
+ cancel_.store(false);
2490
+ }
2491
+
2492
+ std::unique_ptr<log::Writer> CreateCompactionProgressWriter(
2493
+ const std::string& compaction_progress_file) {
2494
+ std::unique_ptr<FSWritableFile> file;
2495
+ EXPECT_OK(fs_->NewWritableFile(compaction_progress_file, FileOptions(),
2496
+ &file, nullptr));
2497
+ auto file_writer = std::make_unique<WritableFileWriter>(
2498
+ std::move(file), compaction_progress_file, FileOptions());
2499
+ auto compaction_progress_writer =
2500
+ std::make_unique<log::Writer>(std::move(file_writer), 0, false);
2501
+ return compaction_progress_writer;
2502
+ }
2503
+
2504
+ Status RunCompactionWithProgressTracking(
2505
+ const CompactionProgress& compaction_progress,
2506
+ log::Writer* compaction_progress_writer,
2507
+ std::vector<SequenceNumber> snapshots = {},
2508
+ std::shared_ptr<Statistics> stats = nullptr) {
2509
+ mutex_.Lock();
2510
+
2511
+ auto cfd = versions_->GetColumnFamilySet()->GetDefault();
2512
+ auto files = cfd->current()->storage_info()->LevelFiles(0);
2513
+
2514
+ db_options_.statistics = stats;
2515
+ db_options_.stats = db_options_.statistics.get();
2516
+
2517
+ std::vector<CompactionInputFiles> compaction_input_files;
2518
+ CompactionInputFiles level;
2519
+ level.level = 0;
2520
+ level.files = files;
2521
+ compaction_input_files.push_back(level);
2522
+
2523
+ Compaction compaction(
2524
+ cfd->current()->storage_info(), cfd->ioptions(),
2525
+ cfd->GetLatestMutableCFOptions(), mutable_db_options_,
2526
+ compaction_input_files, 1, mutable_cf_options_.target_file_size_base,
2527
+ mutable_cf_options_.max_compaction_bytes, 0, kNoCompression,
2528
+ cfd->GetLatestMutableCFOptions().compression_opts,
2529
+ Temperature::kUnknown, 0, {}, std::nullopt, nullptr,
2530
+ CompactionReason::kManualCompaction);
2531
+ compaction.FinalizeInputInfo(cfd->current());
2532
+
2533
+ LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get());
2534
+ EventLogger event_logger(db_options_.info_log.get());
2535
+ JobContext job_context(1, false);
2536
+ job_context.InitSnapshotContext(nullptr, nullptr, kMaxSequenceNumber,
2537
+ std::move(snapshots));
2538
+ CompactionJobStats job_stats;
2539
+
2540
+ CompactionJob compaction_job(
2541
+ 0, &compaction, db_options_, mutable_db_options_, env_options_,
2542
+ versions_.get(), &shutting_down_, &log_buffer, nullptr, nullptr,
2543
+ nullptr, stats.get(), &mutex_, &error_handler_, &job_context,
2544
+ table_cache_, &event_logger, false, false, dbname_, &job_stats,
2545
+ Env::Priority::USER, nullptr, cancel_, env_->GenerateUniqueId(),
2546
+ DBImpl::GenerateDbSessionId(nullptr), "");
2547
+
2548
+ compaction_job.Prepare(std::nullopt, compaction_progress,
2549
+ compaction_progress_writer);
2550
+ mutex_.Unlock();
2551
+
2552
+ compaction_job.Run().PermitUncheckedError();
2553
+ EXPECT_OK(compaction_job.io_status());
2554
+
2555
+ mutex_.Lock();
2556
+
2557
+ bool compaction_released = false;
2558
+ Status s = compaction_job.Install(&compaction_released);
2559
+
2560
+ mutex_.Unlock();
2561
+ if (!compaction_released) {
2562
+ compaction.ReleaseCompactionFiles(s);
2563
+ }
2564
+
2565
+ return s;
2566
+ }
2567
+
2568
+ SubcompactionProgress ReadAndParseProgress(
2569
+ const std::string& compaction_progress_file) {
2570
+ std::unique_ptr<FSSequentialFile> seq_file;
2571
+ EXPECT_OK(fs_->NewSequentialFile(compaction_progress_file, FileOptions(),
2572
+ &seq_file, nullptr));
2573
+ auto file_reader = std::make_unique<SequentialFileReader>(
2574
+ std::move(seq_file), compaction_progress_file, 0, nullptr);
2575
+ log::Reader reader(nullptr, std::move(file_reader), nullptr, true, 0);
2576
+
2577
+ SubcompactionProgressBuilder builder;
2578
+ std::string record;
2579
+ Slice slice;
2580
+
2581
+ while (reader.ReadRecord(&slice, &record)) {
2582
+ VersionEdit edit;
2583
+ if (!edit.DecodeFrom(slice).ok()) continue;
2584
+ builder.ProcessVersionEdit(edit);
2585
+ }
2586
+
2587
+ EXPECT_TRUE(builder.HasAccumulatedSubcompactionProgress());
2588
+
2589
+ return builder.GetAccumulatedSubcompactionProgress();
2590
+ }
2591
+
2592
+ // Test utility function to verify that compaction progress was correctly
2593
+ // persisted to the progress file after compaction interruption.
2594
+ //
2595
+ // VERIFIES:
2596
+ // - Progress file exists and has expected size (empty if no progress
2597
+ // expected)
2598
+ // - Next internal key to compact matches expected user key with proper format
2599
+ // - Number of processed input records matches position in ordered input keys
2600
+ // - Number of processed output records equals number of processed input
2601
+ // records (by test design to simplify verification)
2602
+ // - Each output file contains exactly one user key (by test design to
2603
+ // simplify verification)
2604
+ void VerifyCompactionProgressPersisted(
2605
+ const std::string& compaction_progress_file,
2606
+ const std::string& next_user_key_to_compact,
2607
+ const std::vector<std::string>& ordered_intput_keys) {
2608
+ ASSERT_OK(env_->FileExists(compaction_progress_file));
2609
+
2610
+ uint64_t file_size;
2611
+ ASSERT_OK(env_->GetFileSize(compaction_progress_file, &file_size));
2612
+
2613
+ if (next_user_key_to_compact.empty()) {
2614
+ ASSERT_EQ(file_size, 0);
2615
+ return;
2616
+ }
2617
+
2618
+ const auto& subcompaction_progress =
2619
+ ReadAndParseProgress(compaction_progress_file);
2620
+
2621
+ ASSERT_FALSE(subcompaction_progress.next_internal_key_to_compact.empty());
2622
+ ParsedInternalKey parsed_next_key;
2623
+ ASSERT_OK(
2624
+ ParseInternalKey(subcompaction_progress.next_internal_key_to_compact,
2625
+ &parsed_next_key, true /* log_err_key */));
2626
+ ASSERT_EQ(parsed_next_key.user_key, next_user_key_to_compact);
2627
+ ASSERT_EQ(parsed_next_key.sequence, kMaxSequenceNumber);
2628
+ ASSERT_EQ(parsed_next_key.type, kValueTypeForSeek);
2629
+
2630
+ auto it = std::find(ordered_intput_keys.begin(), ordered_intput_keys.end(),
2631
+ next_user_key_to_compact);
2632
+ ASSERT_TRUE(it != ordered_intput_keys.end());
2633
+
2634
+ auto next_key_index = std::distance(ordered_intput_keys.begin(), it);
2635
+
2636
+ ASSERT_EQ(subcompaction_progress.num_processed_input_records,
2637
+ next_key_index);
2638
+
2639
+ ASSERT_EQ(subcompaction_progress.output_level_progress
2640
+ .GetNumProcessedOutputRecords(),
2641
+ next_key_index);
2642
+
2643
+ ASSERT_EQ(
2644
+ subcompaction_progress.output_level_progress.GetOutputFiles().size(),
2645
+
2646
+ next_key_index);
2647
+
2648
+ for (size_t i = 0;
2649
+ i <
2650
+ subcompaction_progress.output_level_progress.GetOutputFiles().size();
2651
+ ++i) {
2652
+ const auto& output_file =
2653
+ subcompaction_progress.output_level_progress.GetOutputFiles()[i];
2654
+ ASSERT_EQ(output_file.smallest.user_key().ToString(),
2655
+ output_file.largest.user_key().ToString());
2656
+ ASSERT_EQ(output_file.largest.user_key().ToString(),
2657
+ ordered_intput_keys[i]);
2658
+ }
2659
+ }
2660
+
2661
+ void RunCancelAndResumeTest(
2662
+ const std::initializer_list<mock::KVPair>& input_file_1,
2663
+ const std::initializer_list<mock::KVPair>& input_file_2,
2664
+ uint64_t last_sequence, const std::vector<uint64_t>& snapshots,
2665
+ const std::string& expected_next_key_to_compact,
2666
+ const std::vector<std::string>& expected_input_keys, bool exists_progress,
2667
+ bool cancelled_past_mid_point = false) {
2668
+ std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
2669
+
2670
+ auto file1 = mock::MakeMockFile(input_file_1);
2671
+ AddMockFile(file1);
2672
+ auto file2 = mock::MakeMockFile(input_file_2);
2673
+ AddMockFile(file2);
2674
+ SetLastSequence(last_sequence);
2675
+
2676
+ // First compaction (will be cancelled)
2677
+ std::string compaction_progress_file =
2678
+ CompactionProgressFileName(progress_dir_, 123);
2679
+ std::unique_ptr<log::Writer> compaction_progress_writer =
2680
+ CreateCompactionProgressWriter(compaction_progress_file);
2681
+
2682
+ ASSERT_OK(stats->Reset());
2683
+ EnableCompactionCancel();
2684
+
2685
+ Status status = RunCompactionWithProgressTracking(
2686
+ CompactionProgress{}, compaction_progress_writer.get(), snapshots,
2687
+ stats);
2688
+
2689
+ ASSERT_TRUE(status.IsManualCompactionPaused());
2690
+ DisableCompactionCancel();
2691
+
2692
+ HistogramData cancelled_compaction_stats;
2693
+ stats->histogramData(FILE_WRITE_COMPACTION_MICROS,
2694
+ &cancelled_compaction_stats);
2695
+
2696
+ VerifyCompactionProgressPersisted(compaction_progress_file,
2697
+ expected_next_key_to_compact,
2698
+ expected_input_keys);
2699
+
2700
+ // Resume compaction
2701
+ CompactionProgress compaction_progress;
2702
+ if (exists_progress) {
2703
+ compaction_progress.push_back(
2704
+ ReadAndParseProgress(compaction_progress_file));
2705
+ }
2706
+
2707
+ std::string compaction_progress_file_2 =
2708
+ CompactionProgressFileName(progress_dir_, 234);
2709
+ std::unique_ptr<log::Writer> compaction_progress_writer_2 =
2710
+ CreateCompactionProgressWriter(compaction_progress_file_2);
2711
+
2712
+ ASSERT_OK(stats->Reset());
2713
+
2714
+ status = RunCompactionWithProgressTracking(
2715
+ compaction_progress, compaction_progress_writer_2.get(),
2716
+ {} /* snapshots */, stats);
2717
+
2718
+ ASSERT_OK(status);
2719
+
2720
+ if (cancelled_past_mid_point) {
2721
+ HistogramData resumed_compaction_stats;
2722
+ stats->histogramData(FILE_WRITE_COMPACTION_MICROS,
2723
+ &resumed_compaction_stats);
2724
+ ASSERT_GT(cancelled_compaction_stats.count,
2725
+ resumed_compaction_stats.count);
2726
+ }
2727
+ }
2728
+ };
2729
+
2730
+ TEST_F(ResumableCompactionJobTest, BasicProgressPersistence) {
2731
+ NewDB();
2732
+
2733
+ auto file1 = mock::MakeMockFile({
2734
+ {KeyStr("a", 1U, kTypeValue), "val1"},
2735
+ {KeyStr("b", 2U, kTypeValue), "val2"},
2736
+ });
2737
+ AddMockFile(file1);
2738
+
2739
+ auto file2 = mock::MakeMockFile({
2740
+ {KeyStr("c", 3U, kTypeValue), "val3"},
2741
+ {KeyStr("d", 4U, kTypeValue), "val4"},
2742
+ });
2743
+ AddMockFile(file2);
2744
+
2745
+ SetLastSequence(4U);
2746
+
2747
+ std::string compaction_progress_file =
2748
+ CompactionProgressFileName(progress_dir_, 123);
2749
+
2750
+ std::unique_ptr<log::Writer> compaction_progress_writer =
2751
+ CreateCompactionProgressWriter(compaction_progress_file);
2752
+
2753
+ Status status = RunCompactionWithProgressTracking(
2754
+ CompactionProgress(), compaction_progress_writer.get());
2755
+
2756
+ ASSERT_OK(status);
2757
+
2758
+ VerifyCompactionProgressPersisted(
2759
+ compaction_progress_file, "d" /* next_user_key_to_compact */,
2760
+ {"a", "b", "c", "d"} /* ordered_intput_keys */);
2761
+ }
2762
+
2763
+ TEST_F(ResumableCompactionJobTest, BasicProgressResume) {
2764
+ NewDB();
2765
+
2766
+ RunCancelAndResumeTest(
2767
+ {{KeyStr("a", 1U, kTypeValue), "val1"},
2768
+ {KeyStr("b", 2U, kTypeValue), "val2"}} /* input_file_1 */,
2769
+ {{KeyStr("bb", 3U, kTypeValue), "val3"},
2770
+ {KeyStr(kCancelBeforeThisKey, 4U, kTypeValue),
2771
+ "val4"}} /* input_file_2 */,
2772
+ 4U /* last_sequence */, {} /* snapshots */,
2773
+ kCancelBeforeThisKey /* expected_next_key_to_compact */,
2774
+ {"a", "b", "bb", kCancelBeforeThisKey} /* expected_input_keys */,
2775
+ true /* exists_progress */, true /* cancelled_past_mid_point*/);
2776
+ }
2777
+
2778
+ TEST_F(ResumableCompactionJobTest, NoProgressResumeOnSameKey) {
2779
+ NewDB();
2780
+
2781
+ RunCancelAndResumeTest(
2782
+ {{KeyStr(kCancelBeforeThisKey, 1U, kTypeValue),
2783
+ "val1"}} /* input_file_1 */,
2784
+ {{KeyStr(kCancelBeforeThisKey, 2U, kTypeValue),
2785
+ "val2"}} /* input_file_2 */,
2786
+ 2U /* last_sequence */, {1U} /* snapshots */,
2787
+ "" /* expected_next_key_to_compact */,
2788
+ {kCancelBeforeThisKey, kCancelBeforeThisKey} /* expected_input_keys */,
2789
+ false /* exists_progress */);
2790
+ }
2791
+
2792
+ TEST_F(ResumableCompactionJobTest, NoProgressResumeOnDeleteRange) {
2793
+ NewDB();
2794
+
2795
+ RunCancelAndResumeTest(
2796
+ {{KeyStr(kCancelBeforeThisKey, 1U, kTypeValue),
2797
+ "val1"}} /* input_file_1 */,
2798
+ {{KeyStr(kCancelBeforeThisKey, 2U, kTypeRangeDeletion),
2799
+ "val2"}} /* input_file_2 */,
2800
+ 2U /* last_sequence */, {1U} /* snapshots */,
2801
+ "" /* expected_next_key_to_compact */,
2802
+ {kCancelBeforeThisKey, kCancelBeforeThisKey} /* expected_input_keys */,
2803
+ false /* exists_progress */);
2804
+ }
2805
+
2806
+ TEST_F(ResumableCompactionJobTest, NoProgressResumeOnMerge) {
2807
+ merge_op_ = MergeOperators::CreateStringAppendOperator();
2808
+ NewDB();
2809
+
2810
+ RunCancelAndResumeTest(
2811
+ {{KeyStr("a", 1U, kTypeValue), "val1"},
2812
+ {KeyStr("b", 2U, kTypeValue), "val2"}} /* input_file_1 */,
2813
+ {{KeyStr("bb", 3U, kTypeValue), "val3"},
2814
+ {KeyStr(kCancelBeforeThisKey, 4U, kTypeMerge),
2815
+ "val4"}} /* input_file_2 */,
2816
+ 4U /* last_sequence */, {} /* snapshots */,
2817
+ "bb" /* expected_next_key_to_compact */,
2818
+ {"a", "b", "bb", kCancelBeforeThisKey} /* expected_input_keys */,
2819
+ true /* exists_progress */);
2820
+ }
2821
+
2822
+ TEST_F(ResumableCompactionJobTest, NoProgressResumeOnSingleDelete) {
2823
+ NewDB();
2824
+
2825
+ RunCancelAndResumeTest(
2826
+ {{KeyStr("a", 1U, kTypeValue), "val1"},
2827
+ {KeyStr("b", 2U, kTypeValue), "val2"},
2828
+ {KeyStr(kCancelBeforeThisKey, 3U, kTypeValue),
2829
+ "val3"}} /* input_file_1 */,
2830
+ {{KeyStr(kCancelBeforeThisKey, 4U, kTypeSingleDeletion), ""},
2831
+ {KeyStr("d", 5U, kTypeValue), "val4"}} /* input_file_2 */,
2832
+ 5U /* last_sequence */, {3U} /* snapshots */,
2833
+ "b" /* expected_next_key_to_compact */,
2834
+ {"a", "b", kCancelBeforeThisKey, kCancelBeforeThisKey,
2835
+ "d"} /* expected_input_keys */,
2836
+ true /* exists_progress */);
2837
+ }
2838
+
2839
+ TEST_F(ResumableCompactionJobTest, NoProgressResumeOnDeletionAtBottom) {
2840
+ NewDB();
2841
+
2842
+ RunCancelAndResumeTest(
2843
+ {{KeyStr("a", 1U, kTypeValue), "val1"},
2844
+ {KeyStr("b", 2U, kTypeValue), "val2"},
2845
+ {KeyStr(kCancelBeforeThisKey, 3U, kTypeValue),
2846
+ "val3"}} /* input_file_1 */,
2847
+ {{KeyStr(kCancelBeforeThisKey, 4U, kTypeDeletion), ""},
2848
+ {KeyStr("d", 5U, kTypeValue), "val4"}} /* input_file_2 */,
2849
+ 5U /* last_sequence */, {3U} /* snapshots */,
2850
+ "b" /* expected_next_key_to_compact */,
2851
+ {"a", "b", kCancelBeforeThisKey, kCancelBeforeThisKey,
2852
+ "d"} /* expected_input_keys */,
2853
+ true /* exists_progress */);
2854
+ }
2412
2855
  } // namespace ROCKSDB_NAMESPACE
2413
2856
 
2414
2857
  int main(int argc, char** argv) {
@@ -359,7 +359,8 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
359
359
  Status CompactionOutputs::AddToOutput(
360
360
  const CompactionIterator& c_iter,
361
361
  const CompactionFileOpenFunc& open_file_func,
362
- const CompactionFileCloseFunc& close_file_func) {
362
+ const CompactionFileCloseFunc& close_file_func,
363
+ const ParsedInternalKey& prev_table_last_internal_key) {
363
364
  Status s;
364
365
  bool is_range_del = c_iter.IsDeleteRangeSentinelKey();
365
366
  if (is_range_del && compaction_->bottommost_level()) {
@@ -370,7 +371,8 @@ Status CompactionOutputs::AddToOutput(
370
371
  }
371
372
  const Slice& key = c_iter.key();
372
373
  if (ShouldStopBefore(c_iter) && HasBuilder()) {
373
- s = close_file_func(*this, c_iter.InputStatus(), key);
374
+ s = close_file_func(c_iter.InputStatus(), prev_table_last_internal_key, key,
375
+ &c_iter, *this);
374
376
  if (!s.ok()) {
375
377
  return s;
376
378
  }
@@ -21,7 +21,8 @@ namespace ROCKSDB_NAMESPACE {
21
21
  class CompactionOutputs;
22
22
  using CompactionFileOpenFunc = std::function<Status(CompactionOutputs&)>;
23
23
  using CompactionFileCloseFunc =
24
- std::function<Status(CompactionOutputs&, const Status&, const Slice&)>;
24
+ std::function<Status(const Status&, const ParsedInternalKey&, const Slice&,
25
+ const CompactionIterator*, CompactionOutputs&)>;
25
26
 
26
27
  // Files produced by subcompaction, most of the functions are used by
27
28
  // compaction_job Open/Close compaction file functions.
@@ -58,6 +59,8 @@ class CompactionOutputs {
58
59
  precalculated_hash, is_proximal_level_);
59
60
  }
60
61
 
62
+ const std::vector<Output>& GetOutputs() const { return outputs_; }
63
+
61
64
  // Set new table builder for the current output
62
65
  void NewBuilder(const TableBuilderOptions& tboptions);
63
66
 
@@ -195,6 +198,10 @@ class CompactionOutputs {
195
198
  std::pair<SequenceNumber, SequenceNumber> keep_seqno_range,
196
199
  const Slice& next_table_min_key, const std::string& full_history_ts_low);
197
200
 
201
+ void SetNumOutputRecords(uint64_t num_output_records) {
202
+ stats_.num_output_records = num_output_records;
203
+ }
204
+
198
205
  private:
199
206
  friend class SubcompactionState;
200
207
 
@@ -254,7 +261,8 @@ class CompactionOutputs {
254
261
  // close and open new compaction output with the functions provided.
255
262
  Status AddToOutput(const CompactionIterator& c_iter,
256
263
  const CompactionFileOpenFunc& open_file_func,
257
- const CompactionFileCloseFunc& close_file_func);
264
+ const CompactionFileCloseFunc& close_file_func,
265
+ const ParsedInternalKey& prev_table_last_internal_key);
258
266
 
259
267
  // Close the current output. `open_file_func` is needed for creating new file
260
268
  // for range-dels only output file.
@@ -270,9 +278,12 @@ class CompactionOutputs {
270
278
  !range_del_agg->IsEmpty()) {
271
279
  status = open_file_func(*this);
272
280
  }
281
+
273
282
  if (HasBuilder()) {
283
+ const ParsedInternalKey empty_internal_key{};
274
284
  const Slice empty_key{};
275
- Status s = close_file_func(*this, status, empty_key);
285
+ Status s = close_file_func(status, empty_internal_key, empty_key,
286
+ nullptr /* c_iter */, *this);
276
287
  if (!s.ok() && status.ok()) {
277
288
  status = s;
278
289
  }