@nxtedition/rocksdb 12.1.3 → 12.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/binding.cc +12 -13
  2. package/binding.gyp +0 -4
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +9 -7
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +15 -11
  6. package/deps/rocksdb/rocksdb/cache/cache_test.cc +26 -0
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +16 -0
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.h +6 -0
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +38 -8
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +4 -0
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +11 -0
  13. package/deps/rocksdb/rocksdb/cache/lru_cache.h +6 -0
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +2 -1
  15. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +56 -0
  16. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -9
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +10 -0
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +9 -0
  19. package/deps/rocksdb/rocksdb/db/c.cc +9 -0
  20. package/deps/rocksdb/rocksdb/db/c_test.c +12 -1
  21. package/deps/rocksdb/rocksdb/db/column_family.cc +6 -23
  22. package/deps/rocksdb/rocksdb/db/column_family.h +1 -2
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +4 -5
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -4
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -6
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +19 -16
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +34 -30
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +2 -1
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +1 -1
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +16 -31
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +7 -50
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +95 -84
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +616 -5
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +1 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
  39. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +8 -2
  40. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +93 -69
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +353 -89
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +4 -3
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +116 -14
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +67 -8
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +42 -14
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +50 -0
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +79 -32
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +36 -59
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +72 -39
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -12
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +75 -0
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -3
  54. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
  55. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +24 -0
  56. package/deps/rocksdb/rocksdb/db/db_test2.cc +36 -22
  57. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +23 -0
  58. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2 -0
  59. package/deps/rocksdb/rocksdb/db/error_handler.cc +28 -3
  60. package/deps/rocksdb/rocksdb/db/error_handler.h +2 -1
  61. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  62. package/deps/rocksdb/rocksdb/db/experimental.cc +165 -33
  63. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -5
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +37 -28
  65. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -6
  66. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -6
  67. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -6
  68. package/deps/rocksdb/rocksdb/db/job_context.h +4 -0
  69. package/deps/rocksdb/rocksdb/db/memtable.cc +24 -14
  70. package/deps/rocksdb/rocksdb/db/memtable.h +2 -1
  71. package/deps/rocksdb/rocksdb/db/memtable_list.cc +61 -33
  72. package/deps/rocksdb/rocksdb/db/memtable_list.h +8 -0
  73. package/deps/rocksdb/rocksdb/db/repair.cc +4 -2
  74. package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
  75. package/deps/rocksdb/rocksdb/db/version_builder.cc +14 -11
  76. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +20 -4
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +40 -30
  78. package/deps/rocksdb/rocksdb/db/version_set.h +13 -3
  79. package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -76
  80. package/deps/rocksdb/rocksdb/db/write_batch.cc +6 -2
  81. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +1 -1
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2 -1
  85. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +25 -2
  86. package/deps/rocksdb/rocksdb/env/fs_remap.cc +11 -0
  87. package/deps/rocksdb/rocksdb/env/fs_remap.h +5 -0
  88. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +11 -1
  89. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +3 -1
  90. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +20 -1
  91. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +10 -8
  92. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
  93. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +30 -28
  94. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +10 -5
  95. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +3 -1
  96. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +287 -83
  97. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +68 -36
  98. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +8 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
  100. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  101. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +4 -4
  102. package/deps/rocksdb/rocksdb/options/customizable_test.cc +31 -0
  103. package/deps/rocksdb/rocksdb/options/db_options.cc +14 -0
  104. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  105. package/deps/rocksdb/rocksdb/options/options_helper.cc +15 -4
  106. package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
  107. package/deps/rocksdb/rocksdb/options/options_parser.cc +5 -4
  108. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -1
  109. package/deps/rocksdb/rocksdb/options/options_test.cc +38 -45
  110. package/deps/rocksdb/rocksdb/port/port.h +16 -0
  111. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +8 -1
  112. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +10 -20
  113. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -9
  114. package/deps/rocksdb/rocksdb/table/format.cc +32 -4
  115. package/deps/rocksdb/rocksdb/table/format.h +12 -1
  116. package/deps/rocksdb/rocksdb/table/iterator.cc +4 -0
  117. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +214 -161
  118. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +4 -2
  119. package/deps/rocksdb/rocksdb/table/table_properties.cc +4 -0
  120. package/deps/rocksdb/rocksdb/table/table_reader.h +2 -2
  121. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -4
  122. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
  123. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -0
  124. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -2
  125. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -22
  126. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -0
  127. package/deps/rocksdb/rocksdb/util/async_file_reader.h +1 -1
  128. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +3 -0
  129. package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -2
  130. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +3 -3
  131. package/package.json +1 -1
  132. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  133. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -4075,6 +4075,15 @@ void rocksdb_options_set_write_dbid_to_manifest(
4075
4075
  opt->rep.write_dbid_to_manifest = write_dbid_to_manifest;
4076
4076
  }
4077
4077
 
4078
+ unsigned char rocksdb_options_get_write_identity_file(rocksdb_options_t* opt) {
4079
+ return opt->rep.write_identity_file;
4080
+ }
4081
+
4082
+ void rocksdb_options_set_write_identity_file(
4083
+ rocksdb_options_t* opt, unsigned char write_identity_file) {
4084
+ opt->rep.write_identity_file = write_identity_file;
4085
+ }
4086
+
4078
4087
  unsigned char rocksdb_options_get_track_and_verify_wals_in_manifest(
4079
4088
  rocksdb_options_t* opt) {
4080
4089
  return opt->rep.track_and_verify_wals_in_manifest;
@@ -772,6 +772,8 @@ int main(int argc, char** argv) {
772
772
  rocksdb_options_set_write_buffer_size(options, 100000);
773
773
  rocksdb_options_set_paranoid_checks(options, 1);
774
774
  rocksdb_options_set_max_open_files(options, 10);
775
+ /* Compatibility with how test was written */
776
+ rocksdb_options_set_write_dbid_to_manifest(options, 0);
775
777
 
776
778
  table_options = rocksdb_block_based_options_create();
777
779
  rocksdb_block_based_options_set_block_cache(table_options, cache);
@@ -962,15 +964,24 @@ int main(int argc, char** argv) {
962
964
  rocksdb_options_t* options_dbid_in_manifest = rocksdb_options_create();
963
965
  rocksdb_options_set_create_if_missing(options_dbid_in_manifest, 1);
964
966
 
967
+ rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, false);
965
968
  unsigned char write_to_manifest =
966
969
  rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest);
967
970
  CheckCondition(!write_to_manifest);
968
971
  rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, true);
969
- CheckCondition(!write_to_manifest);
970
972
  write_to_manifest =
971
973
  rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest);
972
974
  CheckCondition(write_to_manifest);
973
975
 
976
+ rocksdb_options_set_write_identity_file(options_dbid_in_manifest, true);
977
+ unsigned char write_identity_file =
978
+ rocksdb_options_get_write_identity_file(options_dbid_in_manifest);
979
+ CheckCondition(write_identity_file);
980
+ rocksdb_options_set_write_identity_file(options_dbid_in_manifest, false);
981
+ write_identity_file =
982
+ rocksdb_options_get_write_identity_file(options_dbid_in_manifest);
983
+ CheckCondition(!write_identity_file);
984
+
974
985
  db = rocksdb_open(options_dbid_in_manifest, dbbackupname, &err);
975
986
  CheckNoError(err);
976
987
 
@@ -1201,8 +1201,10 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
1201
1201
  read_opts.total_order_seek = true;
1202
1202
  MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
1203
1203
  merge_iter_builder.AddIterator(super_version->mem->NewIterator(
1204
- read_opts, /*seqno_to_time_mapping=*/nullptr, &arena));
1204
+ read_opts, /*seqno_to_time_mapping=*/nullptr, &arena,
1205
+ /*prefix_extractor=*/nullptr));
1205
1206
  super_version->imm->AddIterators(read_opts, /*seqno_to_time_mapping=*/nullptr,
1207
+ /*prefix_extractor=*/nullptr,
1206
1208
  &merge_iter_builder,
1207
1209
  false /* add_range_tombstone_iter */);
1208
1210
  ScopedArenaPtr<InternalIterator> memtable_iter(merge_iter_builder.Finish());
@@ -1565,28 +1567,6 @@ Status ColumnFamilyData::SetOptions(
1565
1567
  return s;
1566
1568
  }
1567
1569
 
1568
- // REQUIRES: DB mutex held
1569
- Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) {
1570
- if (initial_cf_options_.compaction_style != kCompactionStyleLevel) {
1571
- return Env::WLTH_NOT_SET;
1572
- }
1573
- if (level == 0) {
1574
- return Env::WLTH_MEDIUM;
1575
- }
1576
- int base_level = current_->storage_info()->base_level();
1577
-
1578
- // L1: medium, L2: long, ...
1579
- if (level - base_level >= 2) {
1580
- return Env::WLTH_EXTREME;
1581
- } else if (level < base_level) {
1582
- // There is no restriction which prevents level passed in to be smaller
1583
- // than base_level.
1584
- return Env::WLTH_MEDIUM;
1585
- }
1586
- return static_cast<Env::WriteLifeTimeHint>(
1587
- level - base_level + static_cast<int>(Env::WLTH_MEDIUM));
1588
- }
1589
-
1590
1570
  Status ColumnFamilyData::AddDirectories(
1591
1571
  std::map<std::string, std::shared_ptr<FSDirectory>>* created_dirs) {
1592
1572
  Status s;
@@ -1652,6 +1632,9 @@ bool ColumnFamilyData::ShouldPostponeFlushToRetainUDT(
1652
1632
  }
1653
1633
  for (const Slice& table_newest_udt :
1654
1634
  imm()->GetTablesNewestUDT(max_memtable_id)) {
1635
+ if (table_newest_udt.empty()) {
1636
+ continue;
1637
+ }
1655
1638
  assert(table_newest_udt.size() == full_history_ts_low.size());
1656
1639
  // Checking the newest UDT contained in MemTable with ascending ID up to
1657
1640
  // `max_memtable_id`. Return immediately on finding the first MemTable that
@@ -401,6 +401,7 @@ class ColumnFamilyData {
401
401
  SequenceNumber earliest_seq);
402
402
 
403
403
  TableCache* table_cache() const { return table_cache_.get(); }
404
+ BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
404
405
  BlobSource* blob_source() const { return blob_source_.get(); }
405
406
 
406
407
  // See documentation in compaction_picker.h
@@ -511,8 +512,6 @@ class ColumnFamilyData {
511
512
  return initial_cf_options_;
512
513
  }
513
514
 
514
- Env::WriteLifeTimeHint CalculateSSTWriteHint(int level);
515
-
516
515
  // created_dirs remembers directory created, so that we don't need to call
517
516
  // the same data creation operation again.
518
517
  Status AddDirectories(
@@ -686,12 +686,11 @@ bool Compaction::KeyRangeNotExistsBeyondOutputLevel(
686
686
  };
687
687
 
688
688
  // Mark (or clear) each file that is being compacted
689
- void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
689
+ void Compaction::MarkFilesBeingCompacted(bool being_compacted) const {
690
690
  for (size_t i = 0; i < num_input_levels(); i++) {
691
691
  for (size_t j = 0; j < inputs_[i].size(); j++) {
692
- assert(mark_as_compacted ? !inputs_[i][j]->being_compacted
693
- : inputs_[i][j]->being_compacted);
694
- inputs_[i][j]->being_compacted = mark_as_compacted;
692
+ assert(being_compacted != inputs_[i][j]->being_compacted);
693
+ inputs_[i][j]->being_compacted = being_compacted;
695
694
  }
696
695
  }
697
696
  }
@@ -735,7 +734,7 @@ uint64_t Compaction::CalculateTotalInputSize() const {
735
734
  return size;
736
735
  }
737
736
 
738
- void Compaction::ReleaseCompactionFiles(Status status) {
737
+ void Compaction::ReleaseCompactionFiles(const Status& status) {
739
738
  MarkFilesBeingCompacted(false);
740
739
  cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
741
740
  }
@@ -230,7 +230,7 @@ class Compaction {
230
230
  // Delete this compaction from the list of running compactions.
231
231
  //
232
232
  // Requirement: DB mutex held
233
- void ReleaseCompactionFiles(Status status);
233
+ void ReleaseCompactionFiles(const Status& status);
234
234
 
235
235
  // Returns the summary of the compaction in "output" with maximum "len"
236
236
  // in bytes. The caller is responsible for the memory management of
@@ -435,13 +435,13 @@ class Compaction {
435
435
  const int start_level,
436
436
  const int output_level);
437
437
 
438
+ // mark (or clear) all files that are being compacted
439
+ void MarkFilesBeingCompacted(bool being_compacted) const;
440
+
438
441
  private:
439
442
 
440
443
  Status InitInputTableProperties();
441
444
 
442
- // mark (or clear) all files that are being compacted
443
- void MarkFilesBeingCompacted(bool mark_as_compacted);
444
-
445
445
  // get the smallest and largest key present in files to be compacted
446
446
  static void GetBoundaryKeys(VersionStorageInfo* vstorage,
447
447
  const std::vector<CompactionInputFiles>& inputs,
@@ -251,12 +251,13 @@ void CompactionJob::Prepare() {
251
251
 
252
252
  // Generate file_levels_ for compaction before making Iterator
253
253
  auto* c = compact_->compaction;
254
- ColumnFamilyData* cfd = c->column_family_data();
254
+ [[maybe_unused]] ColumnFamilyData* cfd = c->column_family_data();
255
255
  assert(cfd != nullptr);
256
- assert(cfd->current()->storage_info()->NumLevelFiles(
257
- compact_->compaction->level()) > 0);
256
+ const VersionStorageInfo* storage_info = c->input_version()->storage_info();
257
+ assert(storage_info);
258
+ assert(storage_info->NumLevelFiles(compact_->compaction->level()) > 0);
258
259
 
259
- write_hint_ = cfd->CalculateSSTWriteHint(c->output_level());
260
+ write_hint_ = storage_info->CalculateSSTWriteHint(c->output_level());
260
261
  bottommost_level_ = c->bottommost_level();
261
262
 
262
263
  if (c->ShouldFormSubcompactions()) {
@@ -297,8 +298,8 @@ void CompactionJob::Prepare() {
297
298
  for (const auto& each_level : *c->inputs()) {
298
299
  for (const auto& fmd : each_level.files) {
299
300
  std::shared_ptr<const TableProperties> tp;
300
- Status s =
301
- cfd->current()->GetTableProperties(read_options, &tp, fmd, nullptr);
301
+ Status s = c->input_version()->GetTableProperties(read_options, &tp,
302
+ fmd, nullptr);
302
303
  if (s.ok()) {
303
304
  s = seqno_to_time_mapping_.DecodeFrom(tp->seqno_to_time_mapping);
304
305
  }
@@ -805,6 +806,12 @@ Status CompactionJob::Run() {
805
806
  }
806
807
  }
807
808
 
809
+ // Before the compaction starts, is_remote_compaction was set to true if
810
+ // compaction_service is set. We now know whether each sub_compaction was
811
+ // done remotely or not. Reset is_remote_compaction back to false and allow
812
+ // AggregateCompactionStats() to set the right value.
813
+ compaction_job_stats_->is_remote_compaction = false;
814
+
808
815
  // Finish up all bookkeeping to unify the subcompaction results.
809
816
  compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
810
817
  uint64_t num_input_range_del = 0;
@@ -1083,6 +1090,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1083
1090
  }
1084
1091
  // fallback to local compaction
1085
1092
  assert(comp_status == CompactionServiceJobStatus::kUseLocal);
1093
+ sub_compact->compaction_job_stats.is_remote_compaction = false;
1086
1094
  }
1087
1095
 
1088
1096
  uint64_t prev_cpu_micros = db_options_.clock->CPUMicros();
@@ -209,6 +209,8 @@ class CompactionJob {
209
209
  // Returns true iff compaction_stats_.stats.num_input_records and
210
210
  // num_input_range_del are calculated successfully.
211
211
  bool UpdateCompactionStats(uint64_t* num_input_range_del = nullptr);
212
+ virtual void UpdateCompactionJobStats(
213
+ const InternalStats::CompactionStats& stats) const;
212
214
  void LogCompaction();
213
215
  virtual void RecordCompactionIOStats();
214
216
  void CleanupCompaction();
@@ -279,8 +281,7 @@ class CompactionJob {
279
281
  bool* compaction_released);
280
282
  Status OpenCompactionOutputFile(SubcompactionState* sub_compact,
281
283
  CompactionOutputs& outputs);
282
- void UpdateCompactionJobStats(
283
- const InternalStats::CompactionStats& stats) const;
284
+
284
285
  void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats,
285
286
  CompactionJobStats* compaction_job_stats = nullptr);
286
287
 
@@ -377,9 +378,7 @@ class CompactionJob {
377
378
  // doesn't contain the LSM tree information, which is passed though MANIFEST
378
379
  // file.
379
380
  struct CompactionServiceInput {
380
- ColumnFamilyDescriptor column_family;
381
-
382
- DBOptions db_options;
381
+ std::string cf_name;
383
382
 
384
383
  std::vector<SequenceNumber> snapshots;
385
384
 
@@ -398,13 +397,12 @@ struct CompactionServiceInput {
398
397
  bool has_end = false;
399
398
  std::string end;
400
399
 
400
+ uint64_t options_file_number;
401
+
401
402
  // serialization interface to read and write the object
402
403
  static Status Read(const std::string& data_str, CompactionServiceInput* obj);
403
404
  Status Write(std::string* output);
404
405
 
405
- // Initialize a dummy ColumnFamilyDescriptor
406
- CompactionServiceInput() : column_family("", ColumnFamilyOptions()) {}
407
-
408
406
  #ifndef NDEBUG
409
407
  bool TEST_Equals(CompactionServiceInput* other);
410
408
  bool TEST_Equals(CompactionServiceInput* other, std::string* mismatch);
@@ -418,19 +416,22 @@ struct CompactionServiceOutputFile {
418
416
  SequenceNumber largest_seqno;
419
417
  std::string smallest_internal_key;
420
418
  std::string largest_internal_key;
421
- uint64_t oldest_ancester_time;
422
- uint64_t file_creation_time;
423
- uint64_t epoch_number;
419
+ uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
420
+ uint64_t file_creation_time = kUnknownFileCreationTime;
421
+ uint64_t epoch_number = kUnknownEpochNumber;
422
+ std::string file_checksum = kUnknownFileChecksum;
423
+ std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
424
424
  uint64_t paranoid_hash;
425
425
  bool marked_for_compaction;
426
- UniqueId64x2 unique_id;
426
+ UniqueId64x2 unique_id{};
427
427
 
428
428
  CompactionServiceOutputFile() = default;
429
429
  CompactionServiceOutputFile(
430
430
  const std::string& name, SequenceNumber smallest, SequenceNumber largest,
431
431
  std::string _smallest_internal_key, std::string _largest_internal_key,
432
432
  uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
433
- uint64_t _epoch_number, uint64_t _paranoid_hash,
433
+ uint64_t _epoch_number, const std::string& _file_checksum,
434
+ const std::string& _file_checksum_func_name, uint64_t _paranoid_hash,
434
435
  bool _marked_for_compaction, UniqueId64x2 _unique_id)
435
436
  : file_name(name),
436
437
  smallest_seqno(smallest),
@@ -440,6 +441,8 @@ struct CompactionServiceOutputFile {
440
441
  oldest_ancester_time(_oldest_ancester_time),
441
442
  file_creation_time(_file_creation_time),
442
443
  epoch_number(_epoch_number),
444
+ file_checksum(_file_checksum),
445
+ file_checksum_func_name(_file_checksum_func_name),
443
446
  paranoid_hash(_paranoid_hash),
444
447
  marked_for_compaction(_marked_for_compaction),
445
448
  unique_id(std::move(_unique_id)) {}
@@ -456,9 +459,6 @@ struct CompactionServiceResult {
456
459
  // location of the output files
457
460
  std::string output_path;
458
461
 
459
- // some statistics about the compaction
460
- uint64_t num_output_records = 0;
461
- uint64_t total_bytes = 0;
462
462
  uint64_t bytes_read = 0;
463
463
  uint64_t bytes_written = 0;
464
464
  CompactionJobStats stats;
@@ -504,6 +504,9 @@ class CompactionServiceCompactionJob : private CompactionJob {
504
504
  protected:
505
505
  void RecordCompactionIOStats() override;
506
506
 
507
+ void UpdateCompactionJobStats(
508
+ const InternalStats::CompactionStats& stats) const override;
509
+
507
510
  private:
508
511
  // Get table file name in output_path
509
512
  std::string GetTableFileName(uint64_t file_number) override;
@@ -50,7 +50,8 @@ void VerifyInitializationOfCompactionJobStats(
50
50
  ASSERT_EQ(compaction_job_stats.num_output_records, 0U);
51
51
  ASSERT_EQ(compaction_job_stats.num_output_files, 0U);
52
52
 
53
- ASSERT_EQ(compaction_job_stats.is_manual_compaction, true);
53
+ ASSERT_TRUE(compaction_job_stats.is_manual_compaction);
54
+ ASSERT_FALSE(compaction_job_stats.is_remote_compaction);
54
55
 
55
56
  ASSERT_EQ(compaction_job_stats.total_input_bytes, 0U);
56
57
  ASSERT_EQ(compaction_job_stats.total_output_bytes, 0U);
@@ -545,15 +546,14 @@ class CompactionJobTestBase : public testing::Test {
545
546
  ASSERT_OK(s);
546
547
  db_options_.info_log = info_log;
547
548
 
548
- versions_.reset(new VersionSet(
549
- dbname_, &db_options_, env_options_, table_cache_.get(),
550
- &write_buffer_manager_, &write_controller_,
551
- /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
552
- /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
553
- /*error_handler=*/nullptr, /*read_only=*/false));
549
+ versions_.reset(
550
+ new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
551
+ &write_buffer_manager_, &write_controller_,
552
+ /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
553
+ test::kUnitTestDbId, /*db_session_id=*/"",
554
+ /*daily_offpeak_time_utc=*/"",
555
+ /*error_handler=*/nullptr, /*read_only=*/false));
554
556
  compaction_job_stats_.Reset();
555
- ASSERT_OK(
556
- SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
557
557
 
558
558
  VersionEdit new_db;
559
559
  new_db.SetLogNumber(0);
@@ -1569,17 +1569,7 @@ TEST_F(CompactionJobTest, InputSerialization) {
1569
1569
  const int kStrMaxLen = 1000;
1570
1570
  Random rnd(static_cast<uint32_t>(time(nullptr)));
1571
1571
  Random64 rnd64(time(nullptr));
1572
- input.column_family.name = rnd.RandomString(rnd.Uniform(kStrMaxLen));
1573
- input.column_family.options.comparator = ReverseBytewiseComparator();
1574
- input.column_family.options.max_bytes_for_level_base =
1575
- rnd64.Uniform(UINT64_MAX);
1576
- input.column_family.options.disable_auto_compactions = rnd.OneIn(2);
1577
- input.column_family.options.compression = kZSTD;
1578
- input.column_family.options.compression_opts.level = 4;
1579
- input.db_options.max_background_flushes = 10;
1580
- input.db_options.paranoid_checks = rnd.OneIn(2);
1581
- input.db_options.statistics = CreateDBStatistics();
1582
- input.db_options.env = env_;
1572
+ input.cf_name = rnd.RandomString(rnd.Uniform(kStrMaxLen));
1583
1573
  while (!rnd.OneIn(10)) {
1584
1574
  input.snapshots.emplace_back(rnd64.Uniform(UINT64_MAX));
1585
1575
  }
@@ -1607,10 +1597,10 @@ TEST_F(CompactionJobTest, InputSerialization) {
1607
1597
  ASSERT_TRUE(deserialized1.TEST_Equals(&input));
1608
1598
 
1609
1599
  // Test mismatch
1610
- deserialized1.db_options.max_background_flushes += 10;
1600
+ deserialized1.output_level += 10;
1611
1601
  std::string mismatch;
1612
1602
  ASSERT_FALSE(deserialized1.TEST_Equals(&input, &mismatch));
1613
- ASSERT_EQ(mismatch, "db_options.max_background_flushes");
1603
+ ASSERT_EQ(mismatch, "output_level");
1614
1604
 
1615
1605
  // Test unknown field
1616
1606
  CompactionServiceInput deserialized2;
@@ -1666,20 +1656,30 @@ TEST_F(CompactionJobTest, ResultSerialization) {
1666
1656
  };
1667
1657
  result.status =
1668
1658
  status_list.at(rnd.Uniform(static_cast<int>(status_list.size())));
1659
+
1660
+ std::string file_checksum = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen));
1661
+ std::string file_checksum_func_name = "MyAwesomeChecksumGenerator";
1669
1662
  while (!rnd.OneIn(10)) {
1670
1663
  UniqueId64x2 id{rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX)};
1671
1664
  result.output_files.emplace_back(
1672
- rnd.RandomString(rnd.Uniform(kStrMaxLen)), rnd64.Uniform(UINT64_MAX),
1673
- rnd64.Uniform(UINT64_MAX),
1674
- rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
1675
- rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
1676
- rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX),
1677
- rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX), rnd.OneIn(2), id);
1665
+ rnd.RandomString(rnd.Uniform(kStrMaxLen)) /* file_name */,
1666
+ rnd64.Uniform(UINT64_MAX) /* smallest_seqno */,
1667
+ rnd64.Uniform(UINT64_MAX) /* largest_seqno */,
1668
+ rnd.RandomBinaryString(
1669
+ rnd.Uniform(kStrMaxLen)) /* smallest_internal_key */,
1670
+ rnd.RandomBinaryString(
1671
+ rnd.Uniform(kStrMaxLen)) /* largest_internal_key */,
1672
+ rnd64.Uniform(UINT64_MAX) /* oldest_ancester_time */,
1673
+ rnd64.Uniform(UINT64_MAX) /* file_creation_time */,
1674
+ rnd64.Uniform(UINT64_MAX) /* epoch_number */,
1675
+ file_checksum /* file_checksum */,
1676
+ file_checksum_func_name /* file_checksum_func_name */,
1677
+ rnd64.Uniform(UINT64_MAX) /* paranoid_hash */,
1678
+ rnd.OneIn(2) /* marked_for_compaction */, id);
1678
1679
  }
1679
1680
  result.output_level = rnd.Uniform(10);
1680
1681
  result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
1681
- result.num_output_records = rnd64.Uniform(UINT64_MAX);
1682
- result.total_bytes = rnd64.Uniform(UINT64_MAX);
1682
+ result.stats.num_output_records = rnd64.Uniform(UINT64_MAX);
1683
1683
  result.bytes_read = 123;
1684
1684
  result.bytes_written = rnd64.Uniform(UINT64_MAX);
1685
1685
  result.stats.elapsed_micros = rnd64.Uniform(UINT64_MAX);
@@ -1710,6 +1710,10 @@ TEST_F(CompactionJobTest, ResultSerialization) {
1710
1710
  ASSERT_FALSE(deserialized_tmp.TEST_Equals(&result, &mismatch));
1711
1711
  ASSERT_EQ(mismatch, "output_files.unique_id");
1712
1712
  deserialized_tmp.status.PermitUncheckedError();
1713
+
1714
+ ASSERT_EQ(deserialized_tmp.output_files[0].file_checksum, file_checksum);
1715
+ ASSERT_EQ(deserialized_tmp.output_files[0].file_checksum_func_name,
1716
+ file_checksum_func_name);
1713
1717
  }
1714
1718
 
1715
1719
  // Test unknown field
@@ -62,8 +62,9 @@ class CompactionOutputs {
62
62
  }
63
63
 
64
64
  // TODO: Remove it when remote compaction support tiered compaction
65
- void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; }
65
+ void AddBytesWritten(uint64_t bytes) { stats_.bytes_written += bytes; }
66
66
  void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
67
+ void SetNumOutputFiles(uint64_t num) { stats_.num_output_files = num; }
67
68
 
68
69
  // TODO: Move the BlobDB builder into CompactionOutputs
69
70
  const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
@@ -133,7 +133,8 @@ CompactionPicker::CompactionPicker(const ImmutableOptions& ioptions,
133
133
  CompactionPicker::~CompactionPicker() = default;
134
134
 
135
135
  // Delete this compaction from the list of running compactions.
136
- void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) {
136
+ void CompactionPicker::ReleaseCompactionFiles(Compaction* c,
137
+ const Status& status) {
137
138
  UnregisterCompaction(c);
138
139
  if (!status.ok()) {
139
140
  c->ResetNextCompactionIndex();
@@ -104,7 +104,7 @@ class CompactionPicker {
104
104
  // Free up the files that participated in a compaction
105
105
  //
106
106
  // Requirement: DB mutex held
107
- void ReleaseCompactionFiles(Compaction* c, Status status);
107
+ void ReleaseCompactionFiles(Compaction* c, const Status& status);
108
108
 
109
109
  // Returns true if any one of the specified files are being compacted
110
110
  bool AreFilesInCompaction(const std::vector<FileMetaData*>& files);
@@ -294,7 +294,7 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
294
294
  Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
295
295
  const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
296
296
  const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
297
- LogBuffer* log_buffer) {
297
+ LogBuffer* log_buffer) const {
298
298
  const std::vector<FileTemperatureAge>& ages =
299
299
  mutable_cf_options.compaction_options_fifo
300
300
  .file_temperature_age_thresholds;
@@ -344,12 +344,10 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
344
344
  Temperature compaction_target_temp = Temperature::kLastTemperature;
345
345
  if (current_time > min_age) {
346
346
  uint64_t create_time_threshold = current_time - min_age;
347
- uint64_t compaction_size = 0;
348
347
  // We will ideally identify a file qualifying for temperature change by
349
348
  // knowing the timestamp for the youngest entry in the file. However, right
350
349
  // now we don't have the information. We infer it by looking at timestamp of
351
350
  // the previous file's (which is just younger) oldest entry's timestamp.
352
- Temperature cur_target_temp;
353
351
  // avoid index underflow
354
352
  assert(level_files.size() >= 1);
355
353
  for (size_t index = level_files.size() - 1; index >= 1; --index) {
@@ -374,7 +372,7 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
374
372
  // cur_file is too fresh
375
373
  break;
376
374
  }
377
- cur_target_temp = ages[0].temperature;
375
+ Temperature cur_target_temp = ages[0].temperature;
378
376
  for (size_t i = 1; i < ages.size(); ++i) {
379
377
  if (current_time >= ages[i].age &&
380
378
  oldest_ancestor_time <= current_time - ages[i].age) {
@@ -382,35 +380,20 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
382
380
  }
383
381
  }
384
382
  if (cur_file->temperature == cur_target_temp) {
385
- if (inputs[0].empty()) {
386
- continue;
387
- } else {
388
- break;
389
- }
383
+ continue;
390
384
  }
391
385
 
392
386
  // cur_file needs to change temperature
393
- if (compaction_target_temp == Temperature::kLastTemperature) {
394
- assert(inputs[0].empty());
395
- compaction_target_temp = cur_target_temp;
396
- } else if (cur_target_temp != compaction_target_temp) {
397
- assert(!inputs[0].empty());
398
- break;
399
- }
400
- if (inputs[0].empty() || compaction_size + cur_file->fd.GetFileSize() <=
401
- mutable_cf_options.max_compaction_bytes) {
402
- inputs[0].files.push_back(cur_file);
403
- compaction_size += cur_file->fd.GetFileSize();
404
- ROCKS_LOG_BUFFER(
405
- log_buffer,
406
- "[%s] FIFO compaction: picking file %" PRIu64
407
- " with next file's oldest time %" PRIu64 " for temperature %s.",
408
- cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time,
409
- temperature_to_string[cur_target_temp].c_str());
410
- }
411
- if (compaction_size > mutable_cf_options.max_compaction_bytes) {
412
- break;
413
- }
387
+ assert(compaction_target_temp == Temperature::kLastTemperature);
388
+ compaction_target_temp = cur_target_temp;
389
+ inputs[0].files.push_back(cur_file);
390
+ ROCKS_LOG_BUFFER(
391
+ log_buffer,
392
+ "[%s] FIFO compaction: picking file %" PRIu64
393
+ " with next file's oldest time %" PRIu64 " for temperature %s.",
394
+ cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time,
395
+ temperature_to_string[cur_target_temp].c_str());
396
+ break;
414
397
  }
415
398
  }
416
399
 
@@ -418,7 +401,9 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
418
401
  return nullptr;
419
402
  }
420
403
  assert(compaction_target_temp != Temperature::kLastTemperature);
421
-
404
+ // Only compact one file at a time.
405
+ assert(inputs.size() == 1);
406
+ assert(inputs[0].size() == 1);
422
407
  Compaction* c = new Compaction(
423
408
  vstorage, ioptions_, mutable_cf_options, mutable_db_options,
424
409
  std::move(inputs), 0, 0 /* output file size limit */,
@@ -53,9 +53,10 @@ class FIFOCompactionPicker : public CompactionPicker {
53
53
  VersionStorageInfo* version,
54
54
  LogBuffer* log_buffer);
55
55
 
56
+ // Will pick one file to compact at a time, starting from the oldest file.
56
57
  Compaction* PickTemperatureChangeCompaction(
57
58
  const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
58
59
  const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
59
- LogBuffer* log_buffer);
60
+ LogBuffer* log_buffer) const;
60
61
  };
61
62
  } // namespace ROCKSDB_NAMESPACE