@nxtedition/rocksdb 8.1.4 → 8.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -1771,8 +1771,8 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) {
1771
1771
  file->stats.num_reads_sampled.load(std::memory_order_relaxed),
1772
1772
  file->being_compacted, file->temperature,
1773
1773
  file->oldest_blob_file_number, file->TryGetOldestAncesterTime(),
1774
- file->TryGetFileCreationTime(), file->file_checksum,
1775
- file->file_checksum_func_name);
1774
+ file->TryGetFileCreationTime(), file->epoch_number,
1775
+ file->file_checksum, file->file_checksum_func_name);
1776
1776
  files.back().num_entries = file->num_entries;
1777
1777
  files.back().num_deletions = file->num_deletions;
1778
1778
  level_size += file->fd.GetFileSize();
@@ -2036,7 +2036,8 @@ VersionStorageInfo::VersionStorageInfo(
2036
2036
  const InternalKeyComparator* internal_comparator,
2037
2037
  const Comparator* user_comparator, int levels,
2038
2038
  CompactionStyle compaction_style, VersionStorageInfo* ref_vstorage,
2039
- bool _force_consistency_checks)
2039
+ bool _force_consistency_checks,
2040
+ EpochNumberRequirement epoch_number_requirement)
2040
2041
  : internal_comparator_(internal_comparator),
2041
2042
  user_comparator_(user_comparator),
2042
2043
  // cfd is nullptr if Version is dummy
@@ -2064,7 +2065,8 @@ VersionStorageInfo::VersionStorageInfo(
2064
2065
  current_num_samples_(0),
2065
2066
  estimated_compaction_needed_bytes_(0),
2066
2067
  finalized_(false),
2067
- force_consistency_checks_(_force_consistency_checks) {
2068
+ force_consistency_checks_(_force_consistency_checks),
2069
+ epoch_number_requirement_(epoch_number_requirement) {
2068
2070
  if (ref_vstorage != nullptr) {
2069
2071
  accumulated_file_size_ = ref_vstorage->accumulated_file_size_;
2070
2072
  accumulated_raw_key_size_ = ref_vstorage->accumulated_raw_key_size_;
@@ -2085,7 +2087,8 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
2085
2087
  const FileOptions& file_opt,
2086
2088
  const MutableCFOptions mutable_cf_options,
2087
2089
  const std::shared_ptr<IOTracer>& io_tracer,
2088
- uint64_t version_number)
2090
+ uint64_t version_number,
2091
+ EpochNumberRequirement epoch_number_requirement)
2089
2092
  : env_(vset->env_),
2090
2093
  clock_(vset->clock_),
2091
2094
  cfd_(column_family_data),
@@ -2104,7 +2107,8 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
2104
2107
  (cfd_ == nullptr || cfd_->current() == nullptr)
2105
2108
  ? nullptr
2106
2109
  : cfd_->current()->storage_info(),
2107
- cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks),
2110
+ cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks,
2111
+ epoch_number_requirement),
2108
2112
  vset_(vset),
2109
2113
  next_(this),
2110
2114
  prev_(this),
@@ -2386,10 +2390,13 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
2386
2390
  // do a final merge of nullptr and operands;
2387
2391
  if (value || columns) {
2388
2392
  std::string result;
2393
+ // `op_failure_scope` (an output parameter) is not provided (set to
2394
+ // nullptr) since a failure must be propagated regardless of its value.
2389
2395
  *status = MergeHelper::TimedFullMerge(
2390
2396
  merge_operator_, user_key, nullptr, merge_context->GetOperands(),
2391
2397
  &result, info_log_, db_statistics_, clock_,
2392
- /* result_operand */ nullptr, /* update_num_ops_stats */ true);
2398
+ /* result_operand */ nullptr, /* update_num_ops_stats */ true,
2399
+ /* op_failure_scope */ nullptr);
2393
2400
  if (status->ok()) {
2394
2401
  if (LIKELY(value != nullptr)) {
2395
2402
  *(value->GetSelf()) = std::move(result);
@@ -2505,7 +2512,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
2505
2512
  std::vector<folly::coro::Task<Status>> mget_tasks;
2506
2513
  while (f != nullptr) {
2507
2514
  MultiGetRange file_range = fp.CurrentFileRange();
2508
- Cache::Handle* table_handle = nullptr;
2515
+ TableCache::TypedHandle* table_handle = nullptr;
2509
2516
  bool skip_filters =
2510
2517
  IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
2511
2518
  fp.IsHitFileLastInLevel());
@@ -2634,10 +2641,13 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
2634
2641
  // do a final merge of nullptr and operands;
2635
2642
  std::string* str_value =
2636
2643
  iter->value != nullptr ? iter->value->GetSelf() : nullptr;
2644
+ // `op_failure_scope` (an output parameter) is not provided (set to
2645
+ // nullptr) since a failure must be propagated regardless of its value.
2637
2646
  *status = MergeHelper::TimedFullMerge(
2638
2647
  merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(),
2639
2648
  str_value, info_log_, db_statistics_, clock_,
2640
- /* result_operand */ nullptr, /* update_num_ops_stats */ true);
2649
+ /* result_operand */ nullptr, /* update_num_ops_stats */ true,
2650
+ /* op_failure_scope */ nullptr);
2641
2651
  if (LIKELY(iter->value != nullptr)) {
2642
2652
  iter->value->PinSelf();
2643
2653
  range->AddValueSize(iter->value->size());
@@ -2687,7 +2697,7 @@ Status Version::ProcessBatch(
2687
2697
  }
2688
2698
  while (f) {
2689
2699
  MultiGetRange file_range = fp.CurrentFileRange();
2690
- Cache::Handle* table_handle = nullptr;
2700
+ TableCache::TypedHandle* table_handle = nullptr;
2691
2701
  bool skip_filters = IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
2692
2702
  fp.IsHitFileLastInLevel());
2693
2703
  bool skip_range_deletions = false;
@@ -2954,7 +2964,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
2954
2964
  file_meta->num_deletions = tp->num_deletions;
2955
2965
  file_meta->raw_value_size = tp->raw_value_size;
2956
2966
  file_meta->raw_key_size = tp->raw_key_size;
2957
-
2967
+ file_meta->num_range_deletions = tp->num_range_deletions;
2958
2968
  return true;
2959
2969
  }
2960
2970
 
@@ -3056,11 +3066,15 @@ void VersionStorageInfo::ComputeCompensatedSizes() {
3056
3066
  // size of deletion entries in a stable workload, the deletion
3057
3067
  // compensation logic might introduce unwanted effet which changes the
3058
3068
  // shape of LSM tree.
3059
- if (file_meta->num_deletions * 2 >= file_meta->num_entries) {
3069
+ if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >=
3070
+ file_meta->num_entries) {
3060
3071
  file_meta->compensated_file_size +=
3061
- (file_meta->num_deletions * 2 - file_meta->num_entries) *
3072
+ ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 -
3073
+ file_meta->num_entries) *
3062
3074
  average_value_size * kDeletionWeightOnCompaction;
3063
3075
  }
3076
+ file_meta->compensated_file_size +=
3077
+ file_meta->compensated_range_deletion_size;
3064
3078
  }
3065
3079
  }
3066
3080
  }
@@ -4280,6 +4294,74 @@ const char* VersionStorageInfo::LevelFileSummary(FileSummaryStorage* scratch,
4280
4294
  return scratch->buffer;
4281
4295
  }
4282
4296
 
4297
+ bool VersionStorageInfo::HasMissingEpochNumber() const {
4298
+ for (int level = 0; level < num_levels_; ++level) {
4299
+ for (const FileMetaData* f : files_[level]) {
4300
+ if (f->epoch_number == kUnknownEpochNumber) {
4301
+ return true;
4302
+ }
4303
+ }
4304
+ }
4305
+ return false;
4306
+ }
4307
+
4308
+ uint64_t VersionStorageInfo::GetMaxEpochNumberOfFiles() const {
4309
+ uint64_t max_epoch_number = kUnknownEpochNumber;
4310
+ for (int level = 0; level < num_levels_; ++level) {
4311
+ for (const FileMetaData* f : files_[level]) {
4312
+ max_epoch_number = std::max(max_epoch_number, f->epoch_number);
4313
+ }
4314
+ }
4315
+ return max_epoch_number;
4316
+ }
4317
+
4318
+ void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) {
4319
+ cfd->ResetNextEpochNumber();
4320
+
4321
+ bool reserve_epoch_num_for_file_ingested_behind =
4322
+ cfd->ioptions()->allow_ingest_behind;
4323
+ if (reserve_epoch_num_for_file_ingested_behind) {
4324
+ uint64_t reserved_epoch_number = cfd->NewEpochNumber();
4325
+ assert(reserved_epoch_number == kReservedEpochNumberForFileIngestedBehind);
4326
+ ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(),
4327
+ "[%s]CF has reserved epoch number %" PRIu64
4328
+ " for files ingested "
4329
+ "behind since `Options::allow_ingest_behind` is true",
4330
+ cfd->GetName().c_str(), reserved_epoch_number);
4331
+ }
4332
+
4333
+ if (HasMissingEpochNumber()) {
4334
+ assert(epoch_number_requirement_ == EpochNumberRequirement::kMightMissing);
4335
+ assert(num_levels_ >= 1);
4336
+
4337
+ for (int level = num_levels_ - 1; level >= 1; --level) {
4338
+ auto& files_at_level = files_[level];
4339
+ if (files_at_level.empty()) {
4340
+ continue;
4341
+ }
4342
+ uint64_t next_epoch_number = cfd->NewEpochNumber();
4343
+ for (FileMetaData* f : files_at_level) {
4344
+ f->epoch_number = next_epoch_number;
4345
+ }
4346
+ }
4347
+
4348
+ for (auto file_meta_iter = files_[0].rbegin();
4349
+ file_meta_iter != files_[0].rend(); file_meta_iter++) {
4350
+ FileMetaData* f = *file_meta_iter;
4351
+ f->epoch_number = cfd->NewEpochNumber();
4352
+ }
4353
+
4354
+ ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(),
4355
+ "[%s]CF's epoch numbers are inferred based on seqno",
4356
+ cfd->GetName().c_str());
4357
+ epoch_number_requirement_ = EpochNumberRequirement::kMustPresent;
4358
+ } else {
4359
+ assert(epoch_number_requirement_ == EpochNumberRequirement::kMustPresent);
4360
+ cfd->SetNextEpochNumber(
4361
+ std::max(GetMaxEpochNumberOfFiles() + 1, cfd->GetNextEpochNumber()));
4362
+ }
4363
+ }
4364
+
4283
4365
  uint64_t VersionStorageInfo::MaxNextLevelOverlappingBytes() {
4284
4366
  uint64_t result = 0;
4285
4367
  std::vector<FileMetaData*> overlaps;
@@ -4977,10 +5059,15 @@ Status VersionSet::ProcessManifestWrites(
4977
5059
  if (!descriptor_log_ ||
4978
5060
  manifest_file_size_ > db_options_->max_manifest_file_size) {
4979
5061
  TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest");
5062
+ TEST_SYNC_POINT_CALLBACK(
5063
+ "VersionSet::ProcessManifestWrites:BeforeNewManifest", nullptr);
4980
5064
  new_descriptor_log = true;
4981
5065
  } else {
4982
5066
  pending_manifest_file_number_ = manifest_file_number_;
4983
5067
  }
5068
+ TEST_SYNC_POINT_CALLBACK(
5069
+ "VersionSet::ProcessManifestWrites:PostDecidingCreateNewManifestOrNot",
5070
+ &new_descriptor_log);
4984
5071
 
4985
5072
  // Local cached copy of state variable(s). WriteCurrentStateToManifest()
4986
5073
  // reads its content after releasing db mutex to avoid race with
@@ -5109,6 +5196,7 @@ Status VersionSet::ProcessManifestWrites(
5109
5196
  break;
5110
5197
  }
5111
5198
  }
5199
+
5112
5200
  if (s.ok()) {
5113
5201
  io_s = SyncManifest(db_options_, descriptor_log_->file());
5114
5202
  manifest_io_status = io_s;
@@ -5516,7 +5604,8 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
5516
5604
  Status VersionSet::Recover(
5517
5605
  const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
5518
5606
  std::string* db_id, bool no_error_if_files_missing) {
5519
- // Read "CURRENT" file, which contains a pointer to the current manifest file
5607
+ // Read "CURRENT" file, which contains a pointer to the current manifest
5608
+ // file
5520
5609
  std::string manifest_path;
5521
5610
  Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
5522
5611
  &manifest_file_number_);
@@ -5550,7 +5639,8 @@ Status VersionSet::Recover(
5550
5639
  true /* checksum */, 0 /* log_number */);
5551
5640
  VersionEditHandler handler(
5552
5641
  read_only, column_families, const_cast<VersionSet*>(this),
5553
- /*track_missing_files=*/false, no_error_if_files_missing, io_tracer_);
5642
+ /*track_missing_files=*/false, no_error_if_files_missing, io_tracer_,
5643
+ EpochNumberRequirement::kMightMissing);
5554
5644
  handler.Iterate(reader, &log_read_status);
5555
5645
  s = handler.status();
5556
5646
  if (s.ok()) {
@@ -5559,6 +5649,9 @@ Status VersionSet::Recover(
5559
5649
  assert(current_manifest_file_size != 0);
5560
5650
  handler.GetDbId(db_id);
5561
5651
  }
5652
+ if (s.ok()) {
5653
+ RecoverEpochNumbers();
5654
+ }
5562
5655
  }
5563
5656
 
5564
5657
  if (s.ok()) {
@@ -5718,7 +5811,8 @@ Status VersionSet::TryRecoverFromOneManifest(
5718
5811
  log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
5719
5812
  /*checksum=*/true, /*log_num=*/0);
5720
5813
  VersionEditHandlerPointInTime handler_pit(
5721
- read_only, column_families, const_cast<VersionSet*>(this), io_tracer_);
5814
+ read_only, column_families, const_cast<VersionSet*>(this), io_tracer_,
5815
+ EpochNumberRequirement::kMightMissing);
5722
5816
 
5723
5817
  handler_pit.Iterate(reader, &s);
5724
5818
 
@@ -5727,7 +5821,21 @@ Status VersionSet::TryRecoverFromOneManifest(
5727
5821
  assert(nullptr != has_missing_table_file);
5728
5822
  *has_missing_table_file = handler_pit.HasMissingFiles();
5729
5823
 
5730
- return handler_pit.status();
5824
+ s = handler_pit.status();
5825
+ if (s.ok()) {
5826
+ RecoverEpochNumbers();
5827
+ }
5828
+ return s;
5829
+ }
5830
+
5831
+ void VersionSet::RecoverEpochNumbers() {
5832
+ for (auto cfd : *column_family_set_) {
5833
+ if (cfd->IsDropped()) {
5834
+ continue;
5835
+ }
5836
+ assert(cfd->initialized());
5837
+ cfd->RecoverEpochNumbers();
5838
+ }
5731
5839
  }
5732
5840
 
5733
5841
  Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
@@ -6047,6 +6155,22 @@ Status VersionSet::WriteCurrentStateToManifest(
6047
6155
  }
6048
6156
  }
6049
6157
 
6158
+ // New manifest should rollover the WAL deletion record from previous
6159
+ // manifest. Otherwise, when an addition record of a deleted WAL gets added to
6160
+ // this new manifest later (which can happens in e.g, SyncWAL()), this new
6161
+ // manifest creates an illusion that such WAL hasn't been deleted.
6162
+ VersionEdit wal_deletions;
6163
+ wal_deletions.DeleteWalsBefore(min_log_number_to_keep());
6164
+ std::string wal_deletions_record;
6165
+ if (!wal_deletions.EncodeTo(&wal_deletions_record)) {
6166
+ return Status::Corruption("Unable to Encode VersionEdit: " +
6167
+ wal_deletions.DebugString(true));
6168
+ }
6169
+ io_s = log->AddRecord(wal_deletions_record);
6170
+ if (!io_s.ok()) {
6171
+ return io_s;
6172
+ }
6173
+
6050
6174
  for (auto cfd : *column_family_set_) {
6051
6175
  assert(cfd);
6052
6176
 
@@ -6098,8 +6222,9 @@ Status VersionSet::WriteCurrentStateToManifest(
6098
6222
  f->fd.smallest_seqno, f->fd.largest_seqno,
6099
6223
  f->marked_for_compaction, f->temperature,
6100
6224
  f->oldest_blob_file_number, f->oldest_ancester_time,
6101
- f->file_creation_time, f->file_checksum,
6102
- f->file_checksum_func_name, f->unique_id);
6225
+ f->file_creation_time, f->epoch_number, f->file_checksum,
6226
+ f->file_checksum_func_name, f->unique_id,
6227
+ f->compensated_range_deletion_size);
6103
6228
  }
6104
6229
  }
6105
6230
 
@@ -6177,8 +6302,9 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
6177
6302
  const int num_non_empty_levels = vstorage->num_non_empty_levels();
6178
6303
  end_level = (end_level == -1) ? num_non_empty_levels
6179
6304
  : std::min(end_level, num_non_empty_levels);
6180
-
6181
- assert(start_level <= end_level);
6305
+ if (end_level <= start_level) {
6306
+ return 0;
6307
+ }
6182
6308
 
6183
6309
  // Outline of the optimization that uses options.files_size_error_margin.
6184
6310
  // When approximating the files total size that is used to store a keys range,
@@ -6589,6 +6715,7 @@ void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
6589
6715
  filemetadata.temperature = file->temperature;
6590
6716
  filemetadata.oldest_ancester_time = file->TryGetOldestAncesterTime();
6591
6717
  filemetadata.file_creation_time = file->TryGetFileCreationTime();
6718
+ filemetadata.epoch_number = file->epoch_number;
6592
6719
  metadata->push_back(filemetadata);
6593
6720
  }
6594
6721
  }
@@ -6744,16 +6871,16 @@ Status VersionSet::VerifyFileMetadata(ColumnFamilyData* cfd,
6744
6871
 
6745
6872
  InternalStats* internal_stats = cfd->internal_stats();
6746
6873
 
6874
+ TableCache::TypedHandle* handle = nullptr;
6747
6875
  FileMetaData meta_copy = meta;
6748
6876
  status = table_cache->FindTable(
6749
- ReadOptions(), file_opts, *icmp, meta_copy,
6750
- &(meta_copy.table_reader_handle), pe,
6877
+ ReadOptions(), file_opts, *icmp, meta_copy, &handle, pe,
6751
6878
  /*no_io=*/false, /*record_read_stats=*/true,
6752
6879
  internal_stats->GetFileReadHist(level), false, level,
6753
6880
  /*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
6754
6881
  meta_copy.temperature);
6755
- if (meta_copy.table_reader_handle) {
6756
- table_cache->ReleaseHandle(meta_copy.table_reader_handle);
6882
+ if (handle) {
6883
+ table_cache->get_cache().Release(handle);
6757
6884
  }
6758
6885
  }
6759
6886
  return status;
@@ -6791,12 +6918,17 @@ Status ReactiveVersionSet::Recover(
6791
6918
  log::Reader* reader = manifest_reader->get();
6792
6919
  assert(reader);
6793
6920
 
6794
- manifest_tailer_.reset(new ManifestTailer(
6795
- column_families, const_cast<ReactiveVersionSet*>(this), io_tracer_));
6921
+ manifest_tailer_.reset(
6922
+ new ManifestTailer(column_families, const_cast<ReactiveVersionSet*>(this),
6923
+ io_tracer_, EpochNumberRequirement::kMightMissing));
6796
6924
 
6797
6925
  manifest_tailer_->Iterate(*reader, manifest_reader_status->get());
6798
6926
 
6799
- return manifest_tailer_->status();
6927
+ s = manifest_tailer_->status();
6928
+ if (s.ok()) {
6929
+ RecoverEpochNumbers();
6930
+ }
6931
+ return s;
6800
6932
  }
6801
6933
 
6802
6934
  Status ReactiveVersionSet::ReadAndApply(
@@ -116,6 +116,10 @@ extern bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
116
116
  extern void DoGenerateLevelFilesBrief(LevelFilesBrief* file_level,
117
117
  const std::vector<FileMetaData*>& files,
118
118
  Arena* arena);
119
+ enum EpochNumberRequirement {
120
+ kMightMissing,
121
+ kMustPresent,
122
+ };
119
123
 
120
124
  // Information of the storage associated with each Version, including number of
121
125
  // levels of LSM tree, files information at each level, files marked for
@@ -126,7 +130,9 @@ class VersionStorageInfo {
126
130
  const Comparator* user_comparator, int num_levels,
127
131
  CompactionStyle compaction_style,
128
132
  VersionStorageInfo* src_vstorage,
129
- bool _force_consistency_checks);
133
+ bool _force_consistency_checks,
134
+ EpochNumberRequirement epoch_number_requirement =
135
+ EpochNumberRequirement::kMustPresent);
130
136
  // No copying allowed
131
137
  VersionStorageInfo(const VersionStorageInfo&) = delete;
132
138
  void operator=(const VersionStorageInfo&) = delete;
@@ -319,6 +325,17 @@ class VersionStorageInfo {
319
325
  return files_[level];
320
326
  }
321
327
 
328
+ bool HasMissingEpochNumber() const;
329
+ uint64_t GetMaxEpochNumberOfFiles() const;
330
+ EpochNumberRequirement GetEpochNumberRequirement() const {
331
+ return epoch_number_requirement_;
332
+ }
333
+ void SetEpochNumberRequirement(
334
+ EpochNumberRequirement epoch_number_requirement) {
335
+ epoch_number_requirement_ = epoch_number_requirement;
336
+ }
337
+ void RecoverEpochNumbers(ColumnFamilyData* cfd);
338
+
322
339
  class FileLocation {
323
340
  public:
324
341
  FileLocation() = default;
@@ -440,6 +457,11 @@ class VersionStorageInfo {
440
457
  return files_marked_for_compaction_;
441
458
  }
442
459
 
460
+ void TEST_AddFileMarkedForCompaction(int level, FileMetaData* f) {
461
+ f->marked_for_compaction = true;
462
+ files_marked_for_compaction_.emplace_back(level, f);
463
+ }
464
+
443
465
  // REQUIRES: ComputeCompactionScore has been called
444
466
  // REQUIRES: DB mutex held during access
445
467
  const autovector<std::pair<int, FileMetaData*>>& ExpiredTtlFiles() const {
@@ -723,6 +745,8 @@ class VersionStorageInfo {
723
745
  // is compiled in release mode
724
746
  bool force_consistency_checks_;
725
747
 
748
+ EpochNumberRequirement epoch_number_requirement_;
749
+
726
750
  friend class Version;
727
751
  friend class VersionSet;
728
752
  };
@@ -998,7 +1022,7 @@ class Version {
998
1022
  int hit_file_level, bool skip_filters, bool skip_range_deletions,
999
1023
  FdWithKeyRange* f,
1000
1024
  std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
1001
- Cache::Handle* table_handle, uint64_t& num_filter_read,
1025
+ TableCache::TypedHandle* table_handle, uint64_t& num_filter_read,
1002
1026
  uint64_t& num_index_read, uint64_t& num_sst_read);
1003
1027
 
1004
1028
  #ifdef USE_COROUTINES
@@ -1047,7 +1071,9 @@ class Version {
1047
1071
  Version(ColumnFamilyData* cfd, VersionSet* vset, const FileOptions& file_opt,
1048
1072
  MutableCFOptions mutable_cf_options,
1049
1073
  const std::shared_ptr<IOTracer>& io_tracer,
1050
- uint64_t version_number = 0);
1074
+ uint64_t version_number = 0,
1075
+ EpochNumberRequirement epoch_number_requirement =
1076
+ EpochNumberRequirement::kMustPresent);
1051
1077
 
1052
1078
  ~Version();
1053
1079
 
@@ -1188,6 +1214,10 @@ class VersionSet {
1188
1214
  const std::vector<ColumnFamilyDescriptor>& column_families,
1189
1215
  bool read_only, std::string* db_id, bool* has_missing_table_file);
1190
1216
 
1217
+ // Recover the next epoch number of each CFs and epoch number
1218
+ // of their files (if missing)
1219
+ void RecoverEpochNumbers();
1220
+
1191
1221
  // Reads a manifest file and returns a list of column families in
1192
1222
  // column_families.
1193
1223
  static Status ListColumnFamilies(std::vector<std::string>* column_families,
@@ -1401,7 +1431,7 @@ class VersionSet {
1401
1431
  void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) {
1402
1432
  assert(table_cache_);
1403
1433
 
1404
- table_cache_->Erase(GetSlice(&blob_file_number));
1434
+ table_cache_->Erase(GetSliceForKey(&blob_file_number));
1405
1435
 
1406
1436
  obsolete_blob_files_.emplace_back(blob_file_number, std::move(path));
1407
1437
  }
@@ -16,7 +16,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
16
16
  (const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
17
17
  bool skip_filters, bool skip_range_deletions, FdWithKeyRange* f,
18
18
  std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
19
- Cache::Handle* table_handle, uint64_t& num_filter_read,
19
+ TableCache::TypedHandle* table_handle, uint64_t& num_filter_read,
20
20
  uint64_t& num_index_read, uint64_t& num_sst_read) {
21
21
  bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
22
22
  get_perf_context()->per_level_perf_context_enabled;