@nxtedition/rocksdb 8.1.4 → 8.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -155,7 +155,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
155
155
 
156
156
  Status DBImpl::FlushMemTableToOutputFile(
157
157
  ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
158
- bool* made_progress, JobContext* job_context,
158
+ bool* made_progress, JobContext* job_context, FlushReason flush_reason,
159
159
  SuperVersionContext* superversion_context,
160
160
  std::vector<SequenceNumber>& snapshot_seqs,
161
161
  SequenceNumber earliest_write_conflict_snapshot,
@@ -215,7 +215,8 @@ Status DBImpl::FlushMemTableToOutputFile(
215
215
  dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id,
216
216
  file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_,
217
217
  snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
218
- job_context, log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
218
+ job_context, flush_reason, log_buffer, directories_.GetDbDir(),
219
+ GetDataDir(cfd, 0U),
219
220
  GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
220
221
  &event_logger_, mutable_cf_options.report_bg_io_stats,
221
222
  true /* sync_output_directory */, true /* write_manifest */, thread_pri,
@@ -260,7 +261,8 @@ Status DBImpl::FlushMemTableToOutputFile(
260
261
 
261
262
  #ifndef ROCKSDB_LITE
262
263
  // may temporarily unlock and lock the mutex.
263
- NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
264
+ NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id,
265
+ flush_reason);
264
266
  #endif // ROCKSDB_LITE
265
267
 
266
268
  bool switched_to_mempurge = false;
@@ -390,8 +392,9 @@ Status DBImpl::FlushMemTablesToOutputFiles(
390
392
  MutableCFOptions mutable_cf_options_copy = *cfd->GetLatestMutableCFOptions();
391
393
  SuperVersionContext* superversion_context =
392
394
  bg_flush_arg.superversion_context_;
395
+ FlushReason flush_reason = bg_flush_arg.flush_reason_;
393
396
  Status s = FlushMemTableToOutputFile(
394
- cfd, mutable_cf_options_copy, made_progress, job_context,
397
+ cfd, mutable_cf_options_copy, made_progress, job_context, flush_reason,
395
398
  superversion_context, snapshot_seqs, earliest_write_conflict_snapshot,
396
399
  snapshot_checker, log_buffer, thread_pri);
397
400
  return s;
@@ -420,7 +423,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
420
423
  for (const auto cfd : cfds) {
421
424
  assert(cfd->imm()->NumNotFlushed() != 0);
422
425
  assert(cfd->imm()->IsFlushPending());
423
- assert(cfd->GetFlushReason() == cfds[0]->GetFlushReason());
426
+ }
427
+ for (const auto bg_flush_arg : bg_flush_args) {
428
+ assert(bg_flush_arg.flush_reason_ == bg_flush_args[0].flush_reason_);
424
429
  }
425
430
  #endif /* !NDEBUG */
426
431
 
@@ -459,13 +464,15 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
459
464
  all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions());
460
465
  const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back();
461
466
  uint64_t max_memtable_id = bg_flush_args[i].max_memtable_id_;
467
+ FlushReason flush_reason = bg_flush_args[i].flush_reason_;
462
468
  jobs.emplace_back(new FlushJob(
463
469
  dbname_, cfd, immutable_db_options_, mutable_cf_options,
464
470
  max_memtable_id, file_options_for_compaction_, versions_.get(), &mutex_,
465
471
  &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot,
466
- snapshot_checker, job_context, log_buffer, directories_.GetDbDir(),
467
- data_dir, GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
468
- stats_, &event_logger_, mutable_cf_options.report_bg_io_stats,
472
+ snapshot_checker, job_context, flush_reason, log_buffer,
473
+ directories_.GetDbDir(), data_dir,
474
+ GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
475
+ &event_logger_, mutable_cf_options.report_bg_io_stats,
469
476
  false /* sync_output_directory */, false /* write_manifest */,
470
477
  thread_pri, io_tracer_, seqno_time_mapping_, db_id_, db_session_id_,
471
478
  cfd->GetFullHistoryTsLow(), &blob_callback_));
@@ -483,8 +490,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
483
490
  for (int i = 0; i != num_cfs; ++i) {
484
491
  const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.at(i);
485
492
  // may temporarily unlock and lock the mutex.
493
+ FlushReason flush_reason = bg_flush_args[i].flush_reason_;
486
494
  NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options,
487
- job_context->job_id);
495
+ job_context->job_id, flush_reason);
488
496
  }
489
497
  #endif /* !ROCKSDB_LITE */
490
498
 
@@ -642,8 +650,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
642
650
 
643
651
  bool resuming_from_bg_err =
644
652
  error_handler_.IsDBStopped() ||
645
- (cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
646
- cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
653
+ (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
654
+ bg_flush_args[0].flush_reason_ ==
655
+ FlushReason::kErrorRecoveryRetryFlush);
647
656
  while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
648
657
  std::pair<Status, bool> res = wait_to_install_func();
649
658
 
@@ -660,8 +669,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
660
669
 
661
670
  resuming_from_bg_err =
662
671
  error_handler_.IsDBStopped() ||
663
- (cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
664
- cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
672
+ (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
673
+ bg_flush_args[0].flush_reason_ ==
674
+ FlushReason::kErrorRecoveryRetryFlush);
665
675
  }
666
676
 
667
677
  if (!resuming_from_bg_err) {
@@ -816,7 +826,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
816
826
 
817
827
  void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
818
828
  const MutableCFOptions& mutable_cf_options,
819
- int job_id) {
829
+ int job_id, FlushReason flush_reason) {
820
830
  #ifndef ROCKSDB_LITE
821
831
  if (immutable_db_options_.listeners.size() == 0U) {
822
832
  return;
@@ -849,7 +859,7 @@ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
849
859
  info.triggered_writes_stop = triggered_writes_stop;
850
860
  info.smallest_seqno = file_meta->fd.smallest_seqno;
851
861
  info.largest_seqno = file_meta->fd.largest_seqno;
852
- info.flush_reason = cfd->GetFlushReason();
862
+ info.flush_reason = flush_reason;
853
863
  for (auto listener : immutable_db_options_.listeners) {
854
864
  listener->OnFlushBegin(this, info);
855
865
  }
@@ -862,6 +872,7 @@ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
862
872
  (void)file_meta;
863
873
  (void)mutable_cf_options;
864
874
  (void)job_id;
875
+ (void)flush_reason;
865
876
  #endif // ROCKSDB_LITE
866
877
  }
867
878
 
@@ -1087,6 +1098,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1087
1098
  {
1088
1099
  SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
1089
1100
  Version* current_version = super_version->current;
1101
+
1102
+ // Might need to query the partitioner
1103
+ SstPartitionerFactory* partitioner_factory =
1104
+ current_version->cfd()->ioptions()->sst_partitioner_factory.get();
1105
+ std::unique_ptr<SstPartitioner> partitioner;
1106
+ if (partitioner_factory && begin != nullptr && end != nullptr) {
1107
+ SstPartitioner::Context context;
1108
+ context.is_full_compaction = false;
1109
+ context.is_manual_compaction = true;
1110
+ context.output_level = /*unknown*/ -1;
1111
+ // Small lies about compaction range
1112
+ context.smallest_user_key = *begin;
1113
+ context.largest_user_key = *end;
1114
+ partitioner = partitioner_factory->CreatePartitioner(context);
1115
+ }
1116
+
1090
1117
  ReadOptions ro;
1091
1118
  ro.total_order_seek = true;
1092
1119
  bool overlap;
@@ -1094,14 +1121,50 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1094
1121
  level < current_version->storage_info()->num_non_empty_levels();
1095
1122
  level++) {
1096
1123
  overlap = true;
1124
+
1125
+ // Whether to look at specific keys within files for overlap with
1126
+ // compaction range, other than largest and smallest keys of the file
1127
+ // known in Version metadata.
1128
+ bool check_overlap_within_file = false;
1097
1129
  if (begin != nullptr && end != nullptr) {
1130
+ // Typically checking overlap within files in this case
1131
+ check_overlap_within_file = true;
1132
+ // WART: Not known why we don't check within file in one-sided bound
1133
+ // cases
1134
+ if (partitioner) {
1135
+ // Especially if the partitioner is new, the manual compaction
1136
+ // might be used to enforce the partitioning. Checking overlap
1137
+ // within files might miss cases where compaction is needed to
1138
+ // partition the files, as in this example:
1139
+ // * File has two keys "001" and "111"
1140
+ // * Compaction range is ["011", "101")
1141
+ // * Partition boundary at "100"
1142
+ // In cases like this, file-level overlap with the compaction
1143
+ // range is sufficient to force any partitioning that is needed
1144
+ // within the compaction range.
1145
+ //
1146
+ // But if there's no partitioning boundary within the compaction
1147
+ // range, we can be sure there's no need to fix partitioning
1148
+ // within that range, thus safe to check overlap within file.
1149
+ //
1150
+ // Use a hypothetical trivial move query to check for partition
1151
+ // boundary in range. (NOTE: in defiance of all conventions,
1152
+ // `begin` and `end` here are both INCLUSIVE bounds, which makes
1153
+ // this analogy to CanDoTrivialMove() accurate even when `end` is
1154
+ // the first key in a partition.)
1155
+ if (!partitioner->CanDoTrivialMove(*begin, *end)) {
1156
+ check_overlap_within_file = false;
1157
+ }
1158
+ }
1159
+ }
1160
+ if (check_overlap_within_file) {
1098
1161
  Status status = current_version->OverlapWithLevelIterator(
1099
1162
  ro, file_options_, *begin, *end, level, &overlap);
1100
1163
  if (!status.ok()) {
1101
- overlap = current_version->storage_info()->OverlapInLevel(
1102
- level, begin, end);
1164
+ check_overlap_within_file = false;
1103
1165
  }
1104
- } else {
1166
+ }
1167
+ if (!check_overlap_within_file) {
1105
1168
  overlap = current_version->storage_info()->OverlapInLevel(level,
1106
1169
  begin, end);
1107
1170
  }
@@ -1197,6 +1260,12 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1197
1260
 
1198
1261
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
1199
1262
  "[RefitLevel] waiting for background threads to stop");
1263
+ // TODO(hx235): remove `Enable/DisableManualCompaction` and
1264
+ // `Continue/PauseBackgroundWork` once we ensure registering RefitLevel()'s
1265
+ // range is sufficient (if not, what else is needed) for avoiding range
1266
+ // conflicts with other activities (e.g, compaction, flush) that are
1267
+ // currently avoided by `Enable/DisableManualCompaction` and
1268
+ // `Continue/PauseBackgroundWork`.
1200
1269
  DisableManualCompaction();
1201
1270
  s = PauseBackgroundWork();
1202
1271
  if (s.ok()) {
@@ -1261,13 +1330,6 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
1261
1330
  const_cast<std::atomic<int>*>(&manual_compaction_paused_)));
1262
1331
  {
1263
1332
  InstrumentedMutexLock l(&mutex_);
1264
-
1265
- // This call will unlock/lock the mutex to wait for current running
1266
- // IngestExternalFile() calls to finish.
1267
- WaitForIngestFile();
1268
-
1269
- // We need to get current after `WaitForIngestFile`, because
1270
- // `IngestExternalFile` may add files that overlap with `input_file_names`
1271
1333
  auto* current = cfd->current();
1272
1334
  current->Ref();
1273
1335
 
@@ -1346,6 +1408,7 @@ Status DBImpl::CompactFilesImpl(
1346
1408
 
1347
1409
  Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
1348
1410
  &input_set, cf_meta, output_level);
1411
+ TEST_SYNC_POINT("DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles");
1349
1412
  if (!s.ok()) {
1350
1413
  return s;
1351
1414
  }
@@ -1639,6 +1702,10 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1639
1702
 
1640
1703
  InstrumentedMutexLock guard_lock(&mutex_);
1641
1704
 
1705
+ auto* vstorage = cfd->current()->storage_info();
1706
+ if (vstorage->LevelFiles(level).empty()) {
1707
+ return Status::OK();
1708
+ }
1642
1709
  // only allow one thread refitting
1643
1710
  if (refitting_level_) {
1644
1711
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
@@ -1654,8 +1721,16 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1654
1721
  to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level);
1655
1722
  }
1656
1723
 
1657
- auto* vstorage = cfd->current()->storage_info();
1658
1724
  if (to_level != level) {
1725
+ std::vector<CompactionInputFiles> input(1);
1726
+ input[0].level = level;
1727
+ for (auto& f : vstorage->LevelFiles(level)) {
1728
+ input[0].files.push_back(f);
1729
+ }
1730
+ InternalKey refit_level_smallest;
1731
+ InternalKey refit_level_largest;
1732
+ cfd->compaction_picker()->GetRange(input[0], &refit_level_smallest,
1733
+ &refit_level_largest);
1659
1734
  if (to_level > level) {
1660
1735
  if (level == 0) {
1661
1736
  refitting_level_ = false;
@@ -1669,6 +1744,14 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1669
1744
  return Status::NotSupported(
1670
1745
  "Levels between source and target are not empty for a move.");
1671
1746
  }
1747
+ if (cfd->RangeOverlapWithCompaction(refit_level_smallest.user_key(),
1748
+ refit_level_largest.user_key(),
1749
+ l)) {
1750
+ refitting_level_ = false;
1751
+ return Status::NotSupported(
1752
+ "Levels between source and target "
1753
+ "will have some ongoing compaction's output.");
1754
+ }
1672
1755
  }
1673
1756
  } else {
1674
1757
  // to_level < level
@@ -1679,22 +1762,51 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1679
1762
  return Status::NotSupported(
1680
1763
  "Levels between source and target are not empty for a move.");
1681
1764
  }
1765
+ if (cfd->RangeOverlapWithCompaction(refit_level_smallest.user_key(),
1766
+ refit_level_largest.user_key(),
1767
+ l)) {
1768
+ refitting_level_ = false;
1769
+ return Status::NotSupported(
1770
+ "Levels between source and target "
1771
+ "will have some ongoing compaction's output.");
1772
+ }
1682
1773
  }
1683
1774
  }
1684
1775
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1685
1776
  "[%s] Before refitting:\n%s", cfd->GetName().c_str(),
1686
1777
  cfd->current()->DebugString().data());
1687
1778
 
1779
+ std::unique_ptr<Compaction> c(new Compaction(
1780
+ vstorage, *cfd->ioptions(), mutable_cf_options, mutable_db_options_,
1781
+ {input}, to_level,
1782
+ MaxFileSizeForLevel(
1783
+ mutable_cf_options, to_level,
1784
+ cfd->ioptions()
1785
+ ->compaction_style) /* output file size limit, not applicable */
1786
+ ,
1787
+ LLONG_MAX /* max compaction bytes, not applicable */,
1788
+ 0 /* output path ID, not applicable */, mutable_cf_options.compression,
1789
+ mutable_cf_options.compression_opts, Temperature::kUnknown,
1790
+ 0 /* max_subcompactions, not applicable */,
1791
+ {} /* grandparents, not applicable */, false /* is manual */,
1792
+ "" /* trim_ts */, -1 /* score, not applicable */,
1793
+ false /* is deletion compaction, not applicable */,
1794
+ false /* l0_files_might_overlap, not applicable */,
1795
+ CompactionReason::kRefitLevel));
1796
+ cfd->compaction_picker()->RegisterCompaction(c.get());
1797
+ TEST_SYNC_POINT("DBImpl::ReFitLevel:PostRegisterCompaction");
1688
1798
  VersionEdit edit;
1689
1799
  edit.SetColumnFamily(cfd->GetID());
1800
+
1690
1801
  for (const auto& f : vstorage->LevelFiles(level)) {
1691
1802
  edit.DeleteFile(level, f->fd.GetNumber());
1692
1803
  edit.AddFile(
1693
1804
  to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
1694
1805
  f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
1695
1806
  f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
1696
- f->oldest_ancester_time, f->file_creation_time, f->file_checksum,
1697
- f->file_checksum_func_name, f->unique_id);
1807
+ f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
1808
+ f->file_checksum, f->file_checksum_func_name, f->unique_id,
1809
+ f->compensated_range_deletion_size);
1698
1810
  }
1699
1811
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1700
1812
  "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@@ -1703,6 +1815,9 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1703
1815
  Status status = versions_->LogAndApply(cfd, mutable_cf_options, &edit,
1704
1816
  &mutex_, directories_.GetDbDir());
1705
1817
 
1818
+ cfd->compaction_picker()->UnregisterCompaction(c.get());
1819
+ c.reset();
1820
+
1706
1821
  InstallSuperVersionAndScheduleWork(cfd, &sv_context, mutable_cf_options);
1707
1822
 
1708
1823
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] LogAndApply: %s\n",
@@ -1919,11 +2034,19 @@ Status DBImpl::RunManualCompaction(
1919
2034
  manual.begin, manual.end, &manual.manual_end, &manual_conflict,
1920
2035
  max_file_num_to_ignore, trim_ts)) == nullptr &&
1921
2036
  manual_conflict))) {
1922
- // exclusive manual compactions should not see a conflict during
1923
- // CompactRange
1924
- assert(!exclusive || !manual_conflict);
1925
- // Running either this or some other manual compaction
1926
- bg_cv_.Wait();
2037
+ if (!scheduled) {
2038
+ // There is a conflicting compaction
2039
+ if (manual_compaction_paused_ > 0 || manual.canceled == true) {
2040
+ // Stop waiting since it was canceled. Pretend the error came from
2041
+ // compaction so the below cleanup/error handling code can process it.
2042
+ manual.done = true;
2043
+ manual.status =
2044
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2045
+ }
2046
+ }
2047
+ if (!manual.done) {
2048
+ bg_cv_.Wait();
2049
+ }
1927
2050
  if (manual_compaction_paused_ > 0 && scheduled && !unscheduled) {
1928
2051
  assert(thread_pool_priority != Env::Priority::TOTAL);
1929
2052
  // unschedule all manual compactions
@@ -2001,16 +2124,17 @@ Status DBImpl::RunManualCompaction(
2001
2124
  }
2002
2125
 
2003
2126
  void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
2004
- FlushRequest* req) {
2127
+ FlushReason flush_reason, FlushRequest* req) {
2005
2128
  assert(req != nullptr);
2006
- req->reserve(cfds.size());
2129
+ req->flush_reason = flush_reason;
2130
+ req->cfd_to_max_mem_id_to_persist.reserve(cfds.size());
2007
2131
  for (const auto cfd : cfds) {
2008
2132
  if (nullptr == cfd) {
2009
2133
  // cfd may be null, see DBImpl::ScheduleFlushes
2010
2134
  continue;
2011
2135
  }
2012
2136
  uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID();
2013
- req->emplace_back(cfd, max_memtable_id);
2137
+ req->cfd_to_max_mem_id_to_persist.emplace(cfd, max_memtable_id);
2014
2138
  }
2015
2139
  }
2016
2140
 
@@ -2068,7 +2192,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2068
2192
  if (s.ok()) {
2069
2193
  if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
2070
2194
  !cached_recoverable_state_empty_.load()) {
2071
- FlushRequest req{{cfd, flush_memtable_id}};
2195
+ FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}};
2072
2196
  flush_reqs.emplace_back(std::move(req));
2073
2197
  memtable_ids_to_wait.emplace_back(cfd->imm()->GetLatestMemTableID());
2074
2198
  }
@@ -2096,10 +2220,10 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2096
2220
  "to avoid holding old logs",
2097
2221
  cfd->GetName().c_str());
2098
2222
  s = SwitchMemtable(cfd_stats, &context);
2099
- FlushRequest req{{cfd_stats, flush_memtable_id}};
2223
+ FlushRequest req{flush_reason, {{cfd_stats, flush_memtable_id}}};
2100
2224
  flush_reqs.emplace_back(std::move(req));
2101
2225
  memtable_ids_to_wait.emplace_back(
2102
- cfd->imm()->GetLatestMemTableID());
2226
+ cfd_stats->imm()->GetLatestMemTableID());
2103
2227
  }
2104
2228
  }
2105
2229
  }
@@ -2107,8 +2231,9 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2107
2231
 
2108
2232
  if (s.ok() && !flush_reqs.empty()) {
2109
2233
  for (const auto& req : flush_reqs) {
2110
- assert(req.size() == 1);
2111
- ColumnFamilyData* loop_cfd = req[0].first;
2234
+ assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
2235
+ ColumnFamilyData* loop_cfd =
2236
+ req.cfd_to_max_mem_id_to_persist.begin()->first;
2112
2237
  loop_cfd->imm()->FlushRequested();
2113
2238
  }
2114
2239
  // If the caller wants to wait for this flush to complete, it indicates
@@ -2117,13 +2242,14 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2117
2242
  // Therefore, we increase the cfd's ref count.
2118
2243
  if (flush_options.wait) {
2119
2244
  for (const auto& req : flush_reqs) {
2120
- assert(req.size() == 1);
2121
- ColumnFamilyData* loop_cfd = req[0].first;
2245
+ assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
2246
+ ColumnFamilyData* loop_cfd =
2247
+ req.cfd_to_max_mem_id_to_persist.begin()->first;
2122
2248
  loop_cfd->Ref();
2123
2249
  }
2124
2250
  }
2125
2251
  for (const auto& req : flush_reqs) {
2126
- SchedulePendingFlush(req, flush_reason);
2252
+ SchedulePendingFlush(req);
2127
2253
  }
2128
2254
  MaybeScheduleFlushOrCompaction();
2129
2255
  }
@@ -2142,8 +2268,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2142
2268
  autovector<const uint64_t*> flush_memtable_ids;
2143
2269
  assert(flush_reqs.size() == memtable_ids_to_wait.size());
2144
2270
  for (size_t i = 0; i < flush_reqs.size(); ++i) {
2145
- assert(flush_reqs[i].size() == 1);
2146
- cfds.push_back(flush_reqs[i][0].first);
2271
+ assert(flush_reqs[i].cfd_to_max_mem_id_to_persist.size() == 1);
2272
+ cfds.push_back(flush_reqs[i].cfd_to_max_mem_id_to_persist.begin()->first);
2147
2273
  flush_memtable_ids.push_back(&(memtable_ids_to_wait[i]));
2148
2274
  }
2149
2275
  s = WaitForFlushMemTables(
@@ -2240,8 +2366,8 @@ Status DBImpl::AtomicFlushMemTables(
2240
2366
  cfd->Ref();
2241
2367
  }
2242
2368
  }
2243
- GenerateFlushRequest(cfds, &flush_req);
2244
- SchedulePendingFlush(flush_req, flush_reason);
2369
+ GenerateFlushRequest(cfds, flush_reason, &flush_req);
2370
+ SchedulePendingFlush(flush_req);
2245
2371
  MaybeScheduleFlushOrCompaction();
2246
2372
  }
2247
2373
 
@@ -2256,7 +2382,7 @@ Status DBImpl::AtomicFlushMemTables(
2256
2382
  TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush");
2257
2383
  if (s.ok() && flush_options.wait) {
2258
2384
  autovector<const uint64_t*> flush_memtable_ids;
2259
- for (auto& iter : flush_req) {
2385
+ for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
2260
2386
  flush_memtable_ids.push_back(&(iter.second));
2261
2387
  }
2262
2388
  s = WaitForFlushMemTables(
@@ -2603,9 +2729,9 @@ DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
2603
2729
  FlushRequest flush_req = flush_queue_.front();
2604
2730
  flush_queue_.pop_front();
2605
2731
  if (!immutable_db_options_.atomic_flush) {
2606
- assert(flush_req.size() == 1);
2732
+ assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
2607
2733
  }
2608
- for (const auto& elem : flush_req) {
2734
+ for (const auto& elem : flush_req.cfd_to_max_mem_id_to_persist) {
2609
2735
  if (!immutable_db_options_.atomic_flush) {
2610
2736
  ColumnFamilyData* cfd = elem.first;
2611
2737
  assert(cfd);
@@ -2613,7 +2739,6 @@ DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
2613
2739
  cfd->set_queued_for_flush(false);
2614
2740
  }
2615
2741
  }
2616
- // TODO: need to unset flush reason?
2617
2742
  return flush_req;
2618
2743
  }
2619
2744
 
@@ -2643,31 +2768,29 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
2643
2768
  return cfd;
2644
2769
  }
2645
2770
 
2646
- void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req,
2647
- FlushReason flush_reason) {
2771
+ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
2648
2772
  mutex_.AssertHeld();
2649
- if (flush_req.empty()) {
2773
+ if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
2650
2774
  return;
2651
2775
  }
2652
2776
  if (!immutable_db_options_.atomic_flush) {
2653
2777
  // For the non-atomic flush case, we never schedule multiple column
2654
2778
  // families in the same flush request.
2655
- assert(flush_req.size() == 1);
2656
- ColumnFamilyData* cfd = flush_req[0].first;
2779
+ assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
2780
+ ColumnFamilyData* cfd =
2781
+ flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
2657
2782
  assert(cfd);
2658
2783
 
2659
2784
  if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
2660
2785
  cfd->Ref();
2661
2786
  cfd->set_queued_for_flush(true);
2662
- cfd->SetFlushReason(flush_reason);
2663
2787
  ++unscheduled_flushes_;
2664
2788
  flush_queue_.push_back(flush_req);
2665
2789
  }
2666
2790
  } else {
2667
- for (auto& iter : flush_req) {
2791
+ for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
2668
2792
  ColumnFamilyData* cfd = iter.first;
2669
2793
  cfd->Ref();
2670
- cfd->SetFlushReason(flush_reason);
2671
2794
  }
2672
2795
  ++unscheduled_flushes_;
2673
2796
  flush_queue_.push_back(flush_req);
@@ -2799,10 +2922,12 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2799
2922
  while (!flush_queue_.empty()) {
2800
2923
  // This cfd is already referenced
2801
2924
  const FlushRequest& flush_req = PopFirstFromFlushQueue();
2925
+ FlushReason flush_reason = flush_req.flush_reason;
2802
2926
  superversion_contexts.clear();
2803
- superversion_contexts.reserve(flush_req.size());
2927
+ superversion_contexts.reserve(
2928
+ flush_req.cfd_to_max_mem_id_to_persist.size());
2804
2929
 
2805
- for (const auto& iter : flush_req) {
2930
+ for (const auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
2806
2931
  ColumnFamilyData* cfd = iter.first;
2807
2932
  if (cfd->GetMempurgeUsed()) {
2808
2933
  // If imm() contains silent memtables (e.g.: because
@@ -2818,7 +2943,7 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2818
2943
  }
2819
2944
  superversion_contexts.emplace_back(SuperVersionContext(true));
2820
2945
  bg_flush_args.emplace_back(cfd, iter.second,
2821
- &(superversion_contexts.back()));
2946
+ &(superversion_contexts.back()), flush_reason);
2822
2947
  }
2823
2948
  if (!bg_flush_args.empty()) {
2824
2949
  break;
@@ -2842,9 +2967,14 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2842
2967
  status = FlushMemTablesToOutputFiles(bg_flush_args, made_progress,
2843
2968
  job_context, log_buffer, thread_pri);
2844
2969
  TEST_SYNC_POINT("DBImpl::BackgroundFlush:BeforeFlush");
2845
- // All the CFDs in the FlushReq must have the same flush reason, so just
2846
- // grab the first one
2847
- *reason = bg_flush_args[0].cfd_->GetFlushReason();
2970
+ // All the CFD/bg_flush_arg in the FlushReq must have the same flush reason, so
2971
+ // just grab the first one
2972
+ #ifndef NDEBUG
2973
+ for (const auto bg_flush_arg : bg_flush_args) {
2974
+ assert(bg_flush_arg.flush_reason_ == bg_flush_args[0].flush_reason_);
2975
+ }
2976
+ #endif /* !NDEBUG */
2977
+ *reason = bg_flush_args[0].flush_reason_;
2848
2978
  for (auto& arg : bg_flush_args) {
2849
2979
  ColumnFamilyData* cfd = arg.cfd_;
2850
2980
  if (cfd->UnrefAndTryDelete()) {
@@ -2950,10 +3080,6 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2950
3080
  {
2951
3081
  InstrumentedMutexLock l(&mutex_);
2952
3082
 
2953
- // This call will unlock/lock the mutex to wait for current running
2954
- // IngestExternalFile() calls to finish.
2955
- WaitForIngestFile();
2956
-
2957
3083
  num_running_compactions_++;
2958
3084
 
2959
3085
  std::unique_ptr<std::list<uint64_t>::iterator>
@@ -3334,8 +3460,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3334
3460
  f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
3335
3461
  f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
3336
3462
  f->oldest_blob_file_number, f->oldest_ancester_time,
3337
- f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
3338
- f->unique_id);
3463
+ f->file_creation_time, f->epoch_number, f->file_checksum,
3464
+ f->file_checksum_func_name, f->unique_id,
3465
+ f->compensated_range_deletion_size);
3339
3466
 
3340
3467
  ROCKS_LOG_BUFFER(
3341
3468
  log_buffer,
@@ -3594,11 +3721,6 @@ void DBImpl::RemoveManualCompaction(DBImpl::ManualCompactionState* m) {
3594
3721
  }
3595
3722
 
3596
3723
  bool DBImpl::ShouldntRunManualCompaction(ManualCompactionState* m) {
3597
- if (num_running_ingest_file_ > 0) {
3598
- // We need to wait for other IngestExternalFile() calls to finish
3599
- // before running a manual compaction.
3600
- return true;
3601
- }
3602
3724
  if (m->exclusive) {
3603
3725
  return (bg_bottom_compaction_scheduled_ > 0 ||
3604
3726
  bg_compaction_scheduled_ > 0);
@@ -290,7 +290,7 @@ size_t DBImpl::TEST_GetWalPreallocateBlockSize(
290
290
  }
291
291
 
292
292
  #ifndef ROCKSDB_LITE
293
- void DBImpl::TEST_WaitForPeridicTaskRun(std::function<void()> callback) const {
293
+ void DBImpl::TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const {
294
294
  periodic_task_scheduler_.TEST_WaitForRun(callback);
295
295
  }
296
296
 
@@ -136,8 +136,9 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
136
136
  f->fd.smallest_seqno, f->fd.largest_seqno,
137
137
  f->marked_for_compaction, f->temperature,
138
138
  f->oldest_blob_file_number, f->oldest_ancester_time,
139
- f->file_creation_time, f->file_checksum,
140
- f->file_checksum_func_name, f->unique_id);
139
+ f->file_creation_time, f->epoch_number, f->file_checksum,
140
+ f->file_checksum_func_name, f->unique_id,
141
+ f->compensated_range_deletion_size);
141
142
  }
142
143
 
143
144
  status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
@@ -315,6 +315,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
315
315
  }
316
316
  log_write_mutex_.Unlock();
317
317
  mutex_.Unlock();
318
+ TEST_SYNC_POINT_CALLBACK("FindObsoleteFiles::PostMutexUnlock", nullptr);
318
319
  log_write_mutex_.Lock();
319
320
  while (!logs_.empty() && logs_.front().number < min_log_number) {
320
321
  auto& log = logs_.front();
@@ -360,6 +361,8 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
360
361
  }
361
362
  TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion",
362
363
  &file_deletion_status);
364
+ TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion2",
365
+ const_cast<std::string*>(&fname));
363
366
  if (file_deletion_status.ok()) {
364
367
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
365
368
  "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id,