@nxtedition/rocksdb 8.1.4 → 8.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -569,12 +569,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(
569
569
  assert(result);
570
570
 
571
571
  if (compression_type == kNoCompression) {
572
- CacheAllocationPtr allocation =
573
- AllocateBlock(value_slice.size(), allocator);
574
- memcpy(allocation.get(), value_slice.data(), value_slice.size());
575
-
576
- *result = BlobContents::Create(std::move(allocation), value_slice.size());
577
-
572
+ BlobContentsCreator::Create(result, nullptr, value_slice, allocator);
578
573
  return Status::OK();
579
574
  }
580
575
 
@@ -602,7 +597,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(
602
597
  return Status::Corruption("Unable to uncompress blob");
603
598
  }
604
599
 
605
- *result = BlobContents::Create(std::move(output), uncompressed_size);
600
+ result->reset(new BlobContents(std::move(output), uncompressed_size));
606
601
 
607
602
  return Status::OK();
608
603
  }
@@ -36,8 +36,8 @@ BlobSource::BlobSource(const ImmutableOptions* immutable_options,
36
36
  if (bbto &&
37
37
  bbto->cache_usage_options.options_overrides.at(CacheEntryRole::kBlobCache)
38
38
  .charged == CacheEntryRoleOptions::Decision::kEnabled) {
39
- blob_cache_ = std::make_shared<ChargedCache>(immutable_options->blob_cache,
40
- bbto->block_cache);
39
+ blob_cache_ = SharedCacheInterface{std::make_shared<ChargedCache>(
40
+ immutable_options->blob_cache, bbto->block_cache)};
41
41
  }
42
42
  #endif // ROCKSDB_LITE
43
43
  }
@@ -82,9 +82,8 @@ Status BlobSource::PutBlobIntoCache(
82
82
  assert(cached_blob);
83
83
  assert(cached_blob->IsEmpty());
84
84
 
85
- Cache::Handle* cache_handle = nullptr;
85
+ TypedHandle* cache_handle = nullptr;
86
86
  const Status s = InsertEntryIntoCache(cache_key, blob->get(),
87
- (*blob)->ApproximateMemoryUsage(),
88
87
  &cache_handle, Cache::Priority::BOTTOM);
89
88
  if (s.ok()) {
90
89
  blob->release();
@@ -106,26 +105,10 @@ Status BlobSource::PutBlobIntoCache(
106
105
  return s;
107
106
  }
108
107
 
109
- Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const {
110
- Cache::Handle* cache_handle = nullptr;
111
-
112
- if (lowest_used_cache_tier_ == CacheTier::kNonVolatileBlockTier) {
113
- Cache::CreateCallback create_cb =
114
- [allocator = blob_cache_->memory_allocator()](
115
- const void* buf, size_t size, void** out_obj,
116
- size_t* charge) -> Status {
117
- return BlobContents::CreateCallback(AllocateBlock(size, allocator), buf,
118
- size, out_obj, charge);
119
- };
120
-
121
- cache_handle = blob_cache_->Lookup(key, BlobContents::GetCacheItemHelper(),
122
- create_cb, Cache::Priority::BOTTOM,
123
- true /* wait_for_cache */, statistics_);
124
- } else {
125
- cache_handle = blob_cache_->Lookup(key, statistics_);
126
- }
127
-
128
- return cache_handle;
108
+ BlobSource::TypedHandle* BlobSource::GetEntryFromCache(const Slice& key) const {
109
+ return blob_cache_.LookupFull(
110
+ key, nullptr /* context */, Cache::Priority::BOTTOM,
111
+ true /* wait_for_cache */, statistics_, lowest_used_cache_tier_);
129
112
  }
130
113
 
131
114
  void BlobSource::PinCachedBlob(CacheHandleGuard<BlobContents>* cached_blob,
@@ -166,24 +149,11 @@ void BlobSource::PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
166
149
  }
167
150
 
168
151
  Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value,
169
- size_t charge,
170
- Cache::Handle** cache_handle,
152
+ TypedHandle** cache_handle,
171
153
  Cache::Priority priority) const {
172
- Status s;
173
-
174
- Cache::CacheItemHelper* const cache_item_helper =
175
- BlobContents::GetCacheItemHelper();
176
- assert(cache_item_helper);
177
-
178
- if (lowest_used_cache_tier_ == CacheTier::kNonVolatileBlockTier) {
179
- s = blob_cache_->Insert(key, value, cache_item_helper, charge, cache_handle,
180
- priority);
181
- } else {
182
- s = blob_cache_->Insert(key, value, charge, cache_item_helper->del_cb,
183
- cache_handle, priority);
184
- }
185
-
186
- return s;
154
+ return blob_cache_.InsertFull(key, value, value->ApproximateMemoryUsage(),
155
+ cache_handle, priority,
156
+ lowest_used_cache_tier_);
187
157
  }
188
158
 
189
159
  Status BlobSource::GetBlob(const ReadOptions& read_options,
@@ -252,9 +222,10 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
252
222
  return Status::Corruption("Compression type mismatch when reading blob");
253
223
  }
254
224
 
255
- MemoryAllocator* const allocator = (blob_cache_ && read_options.fill_cache)
256
- ? blob_cache_->memory_allocator()
257
- : nullptr;
225
+ MemoryAllocator* const allocator =
226
+ (blob_cache_ && read_options.fill_cache)
227
+ ? blob_cache_.get()->memory_allocator()
228
+ : nullptr;
258
229
 
259
230
  uint64_t read_size = 0;
260
231
  s = blob_file_reader.GetValue()->GetBlob(
@@ -418,9 +389,10 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
418
389
 
419
390
  assert(blob_file_reader.GetValue());
420
391
 
421
- MemoryAllocator* const allocator = (blob_cache_ && read_options.fill_cache)
422
- ? blob_cache_->memory_allocator()
423
- : nullptr;
392
+ MemoryAllocator* const allocator =
393
+ (blob_cache_ && read_options.fill_cache)
394
+ ? blob_cache_.get()->memory_allocator()
395
+ : nullptr;
424
396
 
425
397
  blob_file_reader.GetValue()->MultiGetBlob(read_options, allocator,
426
398
  _blob_reqs, &_bytes_read);
@@ -8,8 +8,9 @@
8
8
  #include <cinttypes>
9
9
  #include <memory>
10
10
 
11
- #include "cache/cache_helpers.h"
12
11
  #include "cache/cache_key.h"
12
+ #include "cache/typed_cache.h"
13
+ #include "db/blob/blob_contents.h"
13
14
  #include "db/blob/blob_file_cache.h"
14
15
  #include "db/blob/blob_read_request.h"
15
16
  #include "rocksdb/cache.h"
@@ -23,7 +24,6 @@ struct ImmutableOptions;
23
24
  class Status;
24
25
  class FilePrefetchBuffer;
25
26
  class Slice;
26
- class BlobContents;
27
27
 
28
28
  // BlobSource is a class that provides universal access to blobs, regardless of
29
29
  // whether they are in the blob cache, secondary cache, or (remote) storage.
@@ -106,6 +106,14 @@ class BlobSource {
106
106
  bool TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
107
107
  uint64_t offset, size_t* charge = nullptr) const;
108
108
 
109
+ // For TypedSharedCacheInterface
110
+ void Create(BlobContents** out, const char* buf, size_t size,
111
+ MemoryAllocator* alloc);
112
+
113
+ using SharedCacheInterface =
114
+ FullTypedSharedCacheInterface<BlobContents, BlobContentsCreator>;
115
+ using TypedHandle = SharedCacheInterface::TypedHandle;
116
+
109
117
  private:
110
118
  Status GetBlobFromCache(const Slice& cache_key,
111
119
  CacheHandleGuard<BlobContents>* cached_blob) const;
@@ -120,10 +128,10 @@ class BlobSource {
120
128
  static void PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
121
129
  PinnableSlice* value);
122
130
 
123
- Cache::Handle* GetEntryFromCache(const Slice& key) const;
131
+ TypedHandle* GetEntryFromCache(const Slice& key) const;
124
132
 
125
133
  Status InsertEntryIntoCache(const Slice& key, BlobContents* value,
126
- size_t charge, Cache::Handle** cache_handle,
134
+ TypedHandle** cache_handle,
127
135
  Cache::Priority priority) const;
128
136
 
129
137
  inline CacheKey GetCacheKey(uint64_t file_number, uint64_t /*file_size*/,
@@ -141,7 +149,7 @@ class BlobSource {
141
149
  BlobFileCache* blob_file_cache_;
142
150
 
143
151
  // A cache to store uncompressed blobs.
144
- std::shared_ptr<Cache> blob_cache_;
152
+ mutable SharedCacheInterface blob_cache_;
145
153
 
146
154
  // The control option of how the cache tiers will be used. Currently rocksdb
147
155
  // support block/blob cache (volatile tier) and secondary cache (this tier
@@ -1150,15 +1150,6 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1150
1150
  auto blob_cache = options_.blob_cache;
1151
1151
  auto secondary_cache = lru_cache_opts_.secondary_cache;
1152
1152
 
1153
- Cache::CreateCallback create_cb = [](const void* buf, size_t size,
1154
- void** out_obj,
1155
- size_t* charge) -> Status {
1156
- CacheAllocationPtr allocation(new char[size]);
1157
-
1158
- return BlobContents::CreateCallback(std::move(allocation), buf, size,
1159
- out_obj, charge);
1160
- };
1161
-
1162
1153
  {
1163
1154
  // GetBlob
1164
1155
  std::vector<PinnableSlice> values(keys.size());
@@ -1219,14 +1210,15 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1219
1210
  {
1220
1211
  CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[0]);
1221
1212
  const Slice key0 = cache_key.AsSlice();
1222
- auto handle0 = blob_cache->Lookup(key0, statistics);
1213
+ auto handle0 = blob_cache->BasicLookup(key0, statistics);
1223
1214
  ASSERT_EQ(handle0, nullptr);
1224
1215
 
1225
1216
  // key0's item should be in the secondary cache.
1226
1217
  bool is_in_sec_cache = false;
1227
- auto sec_handle0 =
1228
- secondary_cache->Lookup(key0, create_cb, true,
1229
- /*advise_erase=*/true, is_in_sec_cache);
1218
+ auto sec_handle0 = secondary_cache->Lookup(
1219
+ key0, &BlobSource::SharedCacheInterface::kFullHelper,
1220
+ /*context*/ nullptr, true,
1221
+ /*advise_erase=*/true, is_in_sec_cache);
1230
1222
  ASSERT_FALSE(is_in_sec_cache);
1231
1223
  ASSERT_NE(sec_handle0, nullptr);
1232
1224
  ASSERT_TRUE(sec_handle0->IsReady());
@@ -1246,14 +1238,15 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1246
1238
  {
1247
1239
  CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[1]);
1248
1240
  const Slice key1 = cache_key.AsSlice();
1249
- auto handle1 = blob_cache->Lookup(key1, statistics);
1241
+ auto handle1 = blob_cache->BasicLookup(key1, statistics);
1250
1242
  ASSERT_NE(handle1, nullptr);
1251
1243
  blob_cache->Release(handle1);
1252
1244
 
1253
1245
  bool is_in_sec_cache = false;
1254
- auto sec_handle1 =
1255
- secondary_cache->Lookup(key1, create_cb, true,
1256
- /*advise_erase=*/true, is_in_sec_cache);
1246
+ auto sec_handle1 = secondary_cache->Lookup(
1247
+ key1, &BlobSource::SharedCacheInterface::kFullHelper,
1248
+ /*context*/ nullptr, true,
1249
+ /*advise_erase=*/true, is_in_sec_cache);
1257
1250
  ASSERT_FALSE(is_in_sec_cache);
1258
1251
  ASSERT_EQ(sec_handle1, nullptr);
1259
1252
 
@@ -1276,7 +1269,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1276
1269
  // key0 should be in the primary cache.
1277
1270
  CacheKey cache_key0 = base_cache_key.WithOffset(blob_offsets[0]);
1278
1271
  const Slice key0 = cache_key0.AsSlice();
1279
- auto handle0 = blob_cache->Lookup(key0, statistics);
1272
+ auto handle0 = blob_cache->BasicLookup(key0, statistics);
1280
1273
  ASSERT_NE(handle0, nullptr);
1281
1274
  auto value = static_cast<BlobContents*>(blob_cache->Value(handle0));
1282
1275
  ASSERT_NE(value, nullptr);
@@ -1286,12 +1279,12 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1286
1279
  // key1 is not in the primary cache and is in the secondary cache.
1287
1280
  CacheKey cache_key1 = base_cache_key.WithOffset(blob_offsets[1]);
1288
1281
  const Slice key1 = cache_key1.AsSlice();
1289
- auto handle1 = blob_cache->Lookup(key1, statistics);
1282
+ auto handle1 = blob_cache->BasicLookup(key1, statistics);
1290
1283
  ASSERT_EQ(handle1, nullptr);
1291
1284
 
1292
1285
  // erase key0 from the primary cache.
1293
1286
  blob_cache->Erase(key0);
1294
- handle0 = blob_cache->Lookup(key0, statistics);
1287
+ handle0 = blob_cache->BasicLookup(key0, statistics);
1295
1288
  ASSERT_EQ(handle0, nullptr);
1296
1289
 
1297
1290
  // key1 promotion should succeed due to the primary cache being empty. we
@@ -1307,7 +1300,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1307
1300
  // in the secondary cache. So, the primary cache's Lookup() without
1308
1301
  // secondary cache support cannot see it. (NOTE: The dummy handle used
1309
1302
  // to be a leaky abstraction but not anymore.)
1310
- handle1 = blob_cache->Lookup(key1, statistics);
1303
+ handle1 = blob_cache->BasicLookup(key1, statistics);
1311
1304
  ASSERT_EQ(handle1, nullptr);
1312
1305
 
1313
1306
  // But after another access, it is promoted to primary cache
@@ -1315,7 +1308,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
1315
1308
  blob_offsets[1]));
1316
1309
 
1317
1310
  // And Lookup() can find it (without secondary cache support)
1318
- handle1 = blob_cache->Lookup(key1, statistics);
1311
+ handle1 = blob_cache->BasicLookup(key1, statistics);
1319
1312
  ASSERT_NE(handle1, nullptr);
1320
1313
  ASSERT_NE(blob_cache->Value(handle1), nullptr);
1321
1314
  blob_cache->Release(handle1);
@@ -71,8 +71,9 @@ Status BuildTable(
71
71
  int job_id, const Env::IOPriority io_priority,
72
72
  TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
73
73
  const std::string* full_history_ts_low,
74
- BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries,
75
- uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) {
74
+ BlobFileCompletionCallback* blob_callback, Version* version,
75
+ uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
76
+ uint64_t* memtable_garbage_bytes) {
76
77
  assert((tboptions.column_family_id ==
77
78
  TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
78
79
  tboptions.column_family_name.empty());
@@ -175,10 +176,10 @@ Status BuildTable(
175
176
  builder = NewTableBuilder(tboptions, file_writer.get());
176
177
  }
177
178
 
179
+ auto ucmp = tboptions.internal_comparator.user_comparator();
178
180
  MergeHelper merge(
179
- env, tboptions.internal_comparator.user_comparator(),
180
- ioptions.merge_operator.get(), compaction_filter.get(), ioptions.logger,
181
- true /* internal key corruption is not ok */,
181
+ env, ucmp, ioptions.merge_operator.get(), compaction_filter.get(),
182
+ ioptions.logger, true /* internal key corruption is not ok */,
182
183
  snapshots.empty() ? 0 : snapshots.back(), snapshot_checker);
183
184
 
184
185
  std::unique_ptr<BlobFileBuilder> blob_file_builder(
@@ -196,9 +197,8 @@ Status BuildTable(
196
197
 
197
198
  const std::atomic<bool> kManualCompactionCanceledFalse{false};
198
199
  CompactionIterator c_iter(
199
- iter, tboptions.internal_comparator.user_comparator(), &merge,
200
- kMaxSequenceNumber, &snapshots, earliest_write_conflict_snapshot,
201
- job_snapshot, snapshot_checker, env,
200
+ iter, ucmp, &merge, kMaxSequenceNumber, &snapshots,
201
+ earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
202
202
  ShouldReportDetailedTime(env, ioptions.stats),
203
203
  true /* internal key corruption is not ok */, range_del_agg.get(),
204
204
  blob_file_builder.get(), ioptions.allow_data_in_errors,
@@ -241,14 +241,28 @@ Status BuildTable(
241
241
 
242
242
  if (s.ok()) {
243
243
  auto range_del_it = range_del_agg->NewIterator();
244
+ Slice last_tombstone_start_user_key{};
244
245
  for (range_del_it->SeekToFirst(); range_del_it->Valid();
245
246
  range_del_it->Next()) {
246
247
  auto tombstone = range_del_it->Tombstone();
247
248
  auto kv = tombstone.Serialize();
248
249
  builder->Add(kv.first.Encode(), kv.second);
249
- meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
250
- tombstone.seq_,
250
+ InternalKey tombstone_end = tombstone.SerializeEndKey();
251
+ meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
251
252
  tboptions.internal_comparator);
253
+ if (version) {
254
+ if (last_tombstone_start_user_key.empty() ||
255
+ ucmp->CompareWithoutTimestamp(last_tombstone_start_user_key,
256
+ range_del_it->start_key()) < 0) {
257
+ SizeApproximationOptions approx_opts;
258
+ approx_opts.files_size_error_margin = 0.1;
259
+ meta->compensated_range_deletion_size += versions->ApproximateSize(
260
+ approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
261
+ 0 /* start_level */, -1 /* end_level */,
262
+ TableReaderCaller::kFlush);
263
+ }
264
+ last_tombstone_start_user_key = range_del_it->start_key();
265
+ }
252
266
  }
253
267
  }
254
268
 
@@ -13,6 +13,7 @@
13
13
  #include "db/range_tombstone_fragmenter.h"
14
14
  #include "db/seqno_to_time_mapping.h"
15
15
  #include "db/table_properties_collector.h"
16
+ #include "db/version_set.h"
16
17
  #include "logging/event_logger.h"
17
18
  #include "options/cf_options.h"
18
19
  #include "rocksdb/comparator.h"
@@ -70,7 +71,7 @@ extern Status BuildTable(
70
71
  Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
71
72
  const std::string* full_history_ts_low = nullptr,
72
73
  BlobFileCompletionCallback* blob_callback = nullptr,
73
- uint64_t* num_input_entries = nullptr,
74
+ Version* version = nullptr, uint64_t* num_input_entries = nullptr,
74
75
  uint64_t* memtable_payload_bytes = nullptr,
75
76
  uint64_t* memtable_garbage_bytes = nullptr);
76
77
 
@@ -2588,6 +2588,12 @@ void rocksdb_block_based_options_set_partition_filters(
2588
2588
  options->rep.partition_filters = partition_filters;
2589
2589
  }
2590
2590
 
2591
+ void rocksdb_block_based_options_set_optimize_filters_for_memory(
2592
+ rocksdb_block_based_table_options_t* options,
2593
+ unsigned char optimize_filters_for_memory) {
2594
+ options->rep.optimize_filters_for_memory = optimize_filters_for_memory;
2595
+ }
2596
+
2591
2597
  void rocksdb_block_based_options_set_use_delta_encoding(
2592
2598
  rocksdb_block_based_table_options_t* options,
2593
2599
  unsigned char use_delta_encoding) {
@@ -4443,6 +4449,15 @@ rocksdb_readoptions_get_io_timeout(rocksdb_readoptions_t* opt) {
4443
4449
  return opt->rep.io_timeout.count();
4444
4450
  }
4445
4451
 
4452
+ void rocksdb_readoptions_set_async_io(rocksdb_readoptions_t* opt,
4453
+ unsigned char v) {
4454
+ opt->rep.async_io = v;
4455
+ }
4456
+
4457
+ unsigned char rocksdb_readoptions_get_async_io(rocksdb_readoptions_t* opt) {
4458
+ return opt->rep.async_io;
4459
+ }
4460
+
4446
4461
  void rocksdb_readoptions_set_timestamp(rocksdb_readoptions_t* opt,
4447
4462
  const char* ts, size_t tslen) {
4448
4463
  if (ts == nullptr) {
@@ -2572,6 +2572,9 @@ int main(int argc, char** argv) {
2572
2572
  rocksdb_readoptions_set_io_timeout(ro, 400);
2573
2573
  CheckCondition(400 == rocksdb_readoptions_get_io_timeout(ro));
2574
2574
 
2575
+ rocksdb_readoptions_set_async_io(ro, 1);
2576
+ CheckCondition(1 == rocksdb_readoptions_get_async_io(ro));
2577
+
2575
2578
  rocksdb_readoptions_destroy(ro);
2576
2579
  }
2577
2580
 
@@ -557,7 +557,6 @@ ColumnFamilyData::ColumnFamilyData(
557
557
  next_(nullptr),
558
558
  prev_(nullptr),
559
559
  log_number_(0),
560
- flush_reason_(FlushReason::kOthers),
561
560
  column_family_set_(column_family_set),
562
561
  queued_for_flush_(false),
563
562
  queued_for_compaction_(false),
@@ -565,7 +564,8 @@ ColumnFamilyData::ColumnFamilyData(
565
564
  allow_2pc_(db_options.allow_2pc),
566
565
  last_memtable_id_(0),
567
566
  db_paths_registered_(false),
568
- mempurge_used_(false) {
567
+ mempurge_used_(false),
568
+ next_epoch_number_(1) {
569
569
  if (id_ != kDummyColumnFamilyDataId) {
570
570
  // TODO(cc): RegisterDbPaths can be expensive, considering moving it
571
571
  // outside of this constructor which might be called with db mutex held.
@@ -1128,12 +1128,9 @@ bool ColumnFamilyData::NeedsCompaction() const {
1128
1128
  Compaction* ColumnFamilyData::PickCompaction(
1129
1129
  const MutableCFOptions& mutable_options,
1130
1130
  const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
1131
- SequenceNumber earliest_mem_seqno =
1132
- std::min(mem_->GetEarliestSequenceNumber(),
1133
- imm_.current()->GetEarliestSequenceNumber(false));
1134
1131
  auto* result = compaction_picker_->PickCompaction(
1135
1132
  GetName(), mutable_options, mutable_db_options, current_->storage_info(),
1136
- log_buffer, earliest_mem_seqno);
1133
+ log_buffer);
1137
1134
  if (result != nullptr) {
1138
1135
  result->SetInputVersion(current_);
1139
1136
  }
@@ -1220,6 +1217,7 @@ Compaction* ColumnFamilyData::CompactRange(
1220
1217
  if (result != nullptr) {
1221
1218
  result->SetInputVersion(current_);
1222
1219
  }
1220
+ TEST_SYNC_POINT("ColumnFamilyData::CompactRange:Return");
1223
1221
  return result;
1224
1222
  }
1225
1223
 
@@ -1520,6 +1518,13 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
1520
1518
  return data_dirs_[path_id].get();
1521
1519
  }
1522
1520
 
1521
+ void ColumnFamilyData::RecoverEpochNumbers() {
1522
+ assert(current_);
1523
+ auto* vstorage = current_->storage_info();
1524
+ assert(vstorage);
1525
+ vstorage->RecoverEpochNumbers(this);
1526
+ }
1527
+
1523
1528
  ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
1524
1529
  const ImmutableDBOptions* db_options,
1525
1530
  const FileOptions& file_options,
@@ -310,10 +310,6 @@ class ColumnFamilyData {
310
310
  void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
311
311
  uint64_t GetLogNumber() const { return log_number_; }
312
312
 
313
- void SetFlushReason(FlushReason flush_reason) {
314
- flush_reason_ = flush_reason;
315
- }
316
- FlushReason GetFlushReason() const { return flush_reason_; }
317
313
  // thread-safe
318
314
  const FileOptions* soptions() const;
319
315
  const ImmutableOptions* ioptions() const { return &ioptions_; }
@@ -533,6 +529,24 @@ class ColumnFamilyData {
533
529
  void SetMempurgeUsed() { mempurge_used_ = true; }
534
530
  bool GetMempurgeUsed() { return mempurge_used_; }
535
531
 
532
+ // Allocate and return a new epoch number
533
+ uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
534
+
535
+ // Get the next epoch number to be assigned
536
+ uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
537
+
538
+ // Set the next epoch number to be assigned
539
+ void SetNextEpochNumber(uint64_t next_epoch_number) {
540
+ next_epoch_number_.store(next_epoch_number);
541
+ }
542
+
543
+ // Reset the next epoch number to be assigned
544
+ void ResetNextEpochNumber() { next_epoch_number_.store(1); }
545
+
546
+ // Recover the next epoch number of this CF and epoch number
547
+ // of its files (if missing)
548
+ void RecoverEpochNumbers();
549
+
536
550
  private:
537
551
  friend class ColumnFamilySet;
538
552
  ColumnFamilyData(uint32_t id, const std::string& name,
@@ -598,8 +612,6 @@ class ColumnFamilyData {
598
612
  // recovered from
599
613
  uint64_t log_number_;
600
614
 
601
- std::atomic<FlushReason> flush_reason_;
602
-
603
615
  // An object that keeps all the compaction stats
604
616
  // and picks the next compaction
605
617
  std::unique_ptr<CompactionPicker> compaction_picker_;
@@ -634,6 +646,8 @@ class ColumnFamilyData {
634
646
  // a Version associated with this CFD
635
647
  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
636
648
  bool mempurge_used_;
649
+
650
+ std::atomic<uint64_t> next_epoch_number_;
637
651
  };
638
652
 
639
653
  // ColumnFamilySet has interesting thread-safety requirements
@@ -238,12 +238,19 @@ Compaction::Compaction(
238
238
  inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))),
239
239
  grandparents_(std::move(_grandparents)),
240
240
  score_(_score),
241
- bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
241
+ bottommost_level_(
242
+ // For simplicity, we don't support the concept of "bottommost level"
243
+ // with
244
+ // `CompactionReason::kExternalSstIngestion` and
245
+ // `CompactionReason::kRefitLevel`
246
+ (_compaction_reason == CompactionReason::kExternalSstIngestion ||
247
+ _compaction_reason == CompactionReason::kRefitLevel)
248
+ ? false
249
+ : IsBottommostLevel(output_level_, vstorage, inputs_)),
242
250
  is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
243
251
  is_manual_compaction_(_manual_compaction),
244
252
  trim_ts_(_trim_ts),
245
253
  is_trivial_move_(false),
246
-
247
254
  compaction_reason_(_compaction_reason),
248
255
  notify_on_compaction_completion_(false),
249
256
  enable_blob_garbage_collection_(
@@ -258,8 +265,15 @@ Compaction::Compaction(
258
265
  _blob_garbage_collection_age_cutoff > 1
259
266
  ? mutable_cf_options()->blob_garbage_collection_age_cutoff
260
267
  : _blob_garbage_collection_age_cutoff),
261
- penultimate_level_(EvaluatePenultimateLevel(
262
- vstorage, immutable_options_, start_level_, output_level_)) {
268
+ penultimate_level_(
269
+ // For simplicity, we don't support the concept of "penultimate level"
270
+ // with `CompactionReason::kExternalSstIngestion` and
271
+ // `CompactionReason::kRefitLevel`
272
+ _compaction_reason == CompactionReason::kExternalSstIngestion ||
273
+ _compaction_reason == CompactionReason::kRefitLevel
274
+ ? Compaction::kInvalidLevel
275
+ : EvaluatePenultimateLevel(vstorage, immutable_options_,
276
+ start_level_, output_level_)) {
263
277
  MarkFilesBeingCompacted(true);
264
278
  if (is_manual_compaction_) {
265
279
  compaction_reason_ = CompactionReason::kManualCompaction;
@@ -332,6 +346,7 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
332
346
  // the case that the penultimate level is empty).
333
347
  if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
334
348
  exclude_level = kInvalidLevel;
349
+ penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
335
350
  std::set<uint64_t> penultimate_inputs;
336
351
  for (const auto& input_lvl : inputs_) {
337
352
  if (input_lvl.level == penultimate_level_) {
@@ -345,7 +360,8 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
345
360
  if (penultimate_inputs.find(file->fd.GetNumber()) ==
346
361
  penultimate_inputs.end()) {
347
362
  exclude_level = number_levels_ - 1;
348
- penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
363
+ penultimate_output_range_type_ =
364
+ PenultimateOutputRangeType::kNonLastRange;
349
365
  break;
350
366
  }
351
367
  }
@@ -354,35 +370,6 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
354
370
  GetBoundaryKeys(input_vstorage_, inputs_,
355
371
  &penultimate_level_smallest_user_key_,
356
372
  &penultimate_level_largest_user_key_, exclude_level);
357
-
358
- // If there's a case that the penultimate level output range is overlapping
359
- // with the existing files, disable the penultimate level output by setting
360
- // the range to empty. One example is the range delete could have overlap
361
- // boundary with the next file. (which is actually a false overlap)
362
- // TODO: Exclude such false overlap, so it won't disable the penultimate
363
- // output.
364
- std::set<uint64_t> penultimate_inputs;
365
- for (const auto& input_lvl : inputs_) {
366
- if (input_lvl.level == penultimate_level_) {
367
- for (const auto& file : input_lvl.files) {
368
- penultimate_inputs.emplace(file->fd.GetNumber());
369
- }
370
- }
371
- }
372
-
373
- auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
374
- for (const auto& file : penultimate_files) {
375
- if (penultimate_inputs.find(file->fd.GetNumber()) ==
376
- penultimate_inputs.end() &&
377
- OverlapPenultimateLevelOutputRange(file->smallest.user_key(),
378
- file->largest.user_key())) {
379
- // basically disable the penultimate range output. which should be rare
380
- // or a false overlap caused by range del
381
- penultimate_level_smallest_user_key_ = "";
382
- penultimate_level_largest_user_key_ = "";
383
- penultimate_output_range_type_ = PenultimateOutputRangeType::kDisabled;
384
- }
385
- }
386
373
  }
387
374
 
388
375
  Compaction::~Compaction() {
@@ -807,6 +794,16 @@ uint64_t Compaction::MinInputFileOldestAncesterTime(
807
794
  return min_oldest_ancester_time;
808
795
  }
809
796
 
797
+ uint64_t Compaction::MinInputFileEpochNumber() const {
798
+ uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
799
+ for (const auto& inputs_per_level : inputs_) {
800
+ for (const auto& file : inputs_per_level.files) {
801
+ min_epoch_number = std::min(min_epoch_number, file->epoch_number);
802
+ }
803
+ }
804
+ return min_epoch_number;
805
+ }
806
+
810
807
  int Compaction::EvaluatePenultimateLevel(
811
808
  const VersionStorageInfo* vstorage,
812
809
  const ImmutableOptions& immutable_options, const int start_level,
@@ -378,6 +378,9 @@ class Compaction {
378
378
  // This is used to filter out some input files' ancester's time range.
379
379
  uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
380
380
  const InternalKey* end) const;
381
+ // Return the minimum epoch number among
382
+ // input files' associated with this compaction
383
+ uint64_t MinInputFileEpochNumber() const;
381
384
 
382
385
  // Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
383
386
  // compaction begin and compaction completion callbacks match.