@nxtedition/rocksdb 8.1.4 → 8.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -1515,7 +1515,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1515
1515
  .PermitUncheckedError(); // ignore error
1516
1516
  const uint64_t current_time = static_cast<uint64_t>(_current_time);
1517
1517
  meta.oldest_ancester_time = current_time;
1518
-
1518
+ meta.epoch_number = cfd->NewEpochNumber();
1519
1519
  {
1520
1520
  auto write_hint = cfd->CalculateSSTWriteHint(0);
1521
1521
  mutex_.Unlock();
@@ -1550,6 +1550,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1550
1550
  0 /* file_creation_time */, db_id_, db_session_id_,
1551
1551
  0 /* target_file_size */, meta.fd.GetNumber());
1552
1552
  SeqnoToTimeMapping empty_seqno_time_mapping;
1553
+ Version* version = cfd->current();
1554
+ version->Ref();
1553
1555
  s = BuildTable(
1554
1556
  dbname_, versions_.get(), immutable_db_options_, tboptions,
1555
1557
  file_options_for_compaction_, cfd->table_cache(), iter.get(),
@@ -1559,7 +1561,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1559
1561
  io_tracer_, BlobFileCreationReason::kRecovery,
1560
1562
  empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
1561
1563
  nullptr /* table_properties */, write_hint,
1562
- nullptr /*full_history_ts_low*/, &blob_callback_);
1564
+ nullptr /*full_history_ts_low*/, &blob_callback_, version);
1565
+ version->Unref();
1563
1566
  LogFlush(immutable_db_options_.info_log);
1564
1567
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1565
1568
  "[%s] [WriteLevel0TableForRecovery]"
@@ -1588,8 +1591,9 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1588
1591
  meta.fd.smallest_seqno, meta.fd.largest_seqno,
1589
1592
  meta.marked_for_compaction, meta.temperature,
1590
1593
  meta.oldest_blob_file_number, meta.oldest_ancester_time,
1591
- meta.file_creation_time, meta.file_checksum,
1592
- meta.file_checksum_func_name, meta.unique_id);
1594
+ meta.file_creation_time, meta.epoch_number,
1595
+ meta.file_checksum, meta.file_checksum_func_name,
1596
+ meta.unique_id, meta.compensated_range_deletion_size);
1593
1597
 
1594
1598
  for (const auto& blob : blob_file_additions) {
1595
1599
  edit->AddBlobFile(blob);
@@ -924,6 +924,16 @@ Status DBImpl::WriteImplWALOnly(
924
924
  write_thread->ExitAsBatchGroupLeader(write_group, status);
925
925
  return status;
926
926
  }
927
+ } else {
928
+ InstrumentedMutexLock lock(&mutex_);
929
+ Status status =
930
+ DelayWrite(/*num_bytes=*/0ull, *write_thread, write_options);
931
+ if (!status.ok()) {
932
+ WriteThread::WriteGroup write_group;
933
+ write_thread->EnterAsBatchGroupLeader(&w, &write_group);
934
+ write_thread->ExitAsBatchGroupLeader(write_group, status);
935
+ return status;
936
+ }
927
937
  }
928
938
 
929
939
  WriteThread::WriteGroup write_group;
@@ -1192,7 +1202,7 @@ Status DBImpl::PreprocessWrite(const WriteOptions& write_options,
1192
1202
  // might happen for smaller writes but larger writes can go through.
1193
1203
  // Can optimize it if it is an issue.
1194
1204
  InstrumentedMutexLock l(&mutex_);
1195
- status = DelayWrite(last_batch_group_size_, write_options);
1205
+ status = DelayWrite(last_batch_group_size_, write_thread_, write_options);
1196
1206
  PERF_TIMER_START(write_pre_and_post_process_time);
1197
1207
  }
1198
1208
 
@@ -1644,14 +1654,14 @@ Status DBImpl::SwitchWAL(WriteContext* write_context) {
1644
1654
  cfd->imm()->FlushRequested();
1645
1655
  if (!immutable_db_options_.atomic_flush) {
1646
1656
  FlushRequest flush_req;
1647
- GenerateFlushRequest({cfd}, &flush_req);
1648
- SchedulePendingFlush(flush_req, FlushReason::kWalFull);
1657
+ GenerateFlushRequest({cfd}, FlushReason::kWalFull, &flush_req);
1658
+ SchedulePendingFlush(flush_req);
1649
1659
  }
1650
1660
  }
1651
1661
  if (immutable_db_options_.atomic_flush) {
1652
1662
  FlushRequest flush_req;
1653
- GenerateFlushRequest(cfds, &flush_req);
1654
- SchedulePendingFlush(flush_req, FlushReason::kWalFull);
1663
+ GenerateFlushRequest(cfds, FlushReason::kWalFull, &flush_req);
1664
+ SchedulePendingFlush(flush_req);
1655
1665
  }
1656
1666
  MaybeScheduleFlushOrCompaction();
1657
1667
  }
@@ -1735,14 +1745,15 @@ Status DBImpl::HandleWriteBufferManagerFlush(WriteContext* write_context) {
1735
1745
  cfd->imm()->FlushRequested();
1736
1746
  if (!immutable_db_options_.atomic_flush) {
1737
1747
  FlushRequest flush_req;
1738
- GenerateFlushRequest({cfd}, &flush_req);
1739
- SchedulePendingFlush(flush_req, FlushReason::kWriteBufferManager);
1748
+ GenerateFlushRequest({cfd}, FlushReason::kWriteBufferManager,
1749
+ &flush_req);
1750
+ SchedulePendingFlush(flush_req);
1740
1751
  }
1741
1752
  }
1742
1753
  if (immutable_db_options_.atomic_flush) {
1743
1754
  FlushRequest flush_req;
1744
- GenerateFlushRequest(cfds, &flush_req);
1745
- SchedulePendingFlush(flush_req, FlushReason::kWriteBufferManager);
1755
+ GenerateFlushRequest(cfds, FlushReason::kWriteBufferManager, &flush_req);
1756
+ SchedulePendingFlush(flush_req);
1746
1757
  }
1747
1758
  MaybeScheduleFlushOrCompaction();
1748
1759
  }
@@ -1759,16 +1770,25 @@ uint64_t DBImpl::GetMaxTotalWalSize() const {
1759
1770
  }
1760
1771
 
1761
1772
  // REQUIRES: mutex_ is held
1762
- // REQUIRES: this thread is currently at the front of the writer queue
1763
- Status DBImpl::DelayWrite(uint64_t num_bytes,
1773
+ // REQUIRES: this thread is currently at the leader for write_thread
1774
+ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread,
1764
1775
  const WriteOptions& write_options) {
1776
+ mutex_.AssertHeld();
1765
1777
  uint64_t time_delayed = 0;
1766
1778
  bool delayed = false;
1767
1779
  {
1768
1780
  StopWatch sw(immutable_db_options_.clock, stats_, WRITE_STALL,
1769
1781
  &time_delayed);
1770
- uint64_t delay =
1771
- write_controller_.GetDelay(immutable_db_options_.clock, num_bytes);
1782
+ // To avoid parallel timed delays (bad throttling), only support them
1783
+ // on the primary write queue.
1784
+ uint64_t delay;
1785
+ if (&write_thread == &write_thread_) {
1786
+ delay =
1787
+ write_controller_.GetDelay(immutable_db_options_.clock, num_bytes);
1788
+ } else {
1789
+ assert(num_bytes == 0);
1790
+ delay = 0;
1791
+ }
1772
1792
  TEST_SYNC_POINT("DBImpl::DelayWrite:Start");
1773
1793
  if (delay > 0) {
1774
1794
  if (write_options.no_slowdown) {
@@ -1776,9 +1796,9 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
1776
1796
  }
1777
1797
  TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep");
1778
1798
 
1779
- // Notify write_thread_ about the stall so it can setup a barrier and
1799
+ // Notify write_thread about the stall so it can setup a barrier and
1780
1800
  // fail any pending writers with no_slowdown
1781
- write_thread_.BeginWriteStall();
1801
+ write_thread.BeginWriteStall();
1782
1802
  mutex_.Unlock();
1783
1803
  TEST_SYNC_POINT("DBImpl::DelayWrite:BeginWriteStallDone");
1784
1804
  // We will delay the write until we have slept for `delay` microseconds
@@ -1798,7 +1818,7 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
1798
1818
  immutable_db_options_.clock->SleepForMicroseconds(kDelayInterval);
1799
1819
  }
1800
1820
  mutex_.Lock();
1801
- write_thread_.EndWriteStall();
1821
+ write_thread.EndWriteStall();
1802
1822
  }
1803
1823
 
1804
1824
  // Don't wait if there's a background error, even if its a soft error. We
@@ -1812,12 +1832,12 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
1812
1832
  }
1813
1833
  delayed = true;
1814
1834
 
1815
- // Notify write_thread_ about the stall so it can setup a barrier and
1835
+ // Notify write_thread about the stall so it can setup a barrier and
1816
1836
  // fail any pending writers with no_slowdown
1817
- write_thread_.BeginWriteStall();
1837
+ write_thread.BeginWriteStall();
1818
1838
  TEST_SYNC_POINT("DBImpl::DelayWrite:Wait");
1819
1839
  bg_cv_.Wait();
1820
- write_thread_.EndWriteStall();
1840
+ write_thread.EndWriteStall();
1821
1841
  }
1822
1842
  }
1823
1843
  assert(!delayed || !write_options.no_slowdown);
@@ -1998,13 +2018,13 @@ Status DBImpl::ScheduleFlushes(WriteContext* context) {
1998
2018
  if (immutable_db_options_.atomic_flush) {
1999
2019
  AssignAtomicFlushSeq(cfds);
2000
2020
  FlushRequest flush_req;
2001
- GenerateFlushRequest(cfds, &flush_req);
2002
- SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
2021
+ GenerateFlushRequest(cfds, FlushReason::kWriteBufferFull, &flush_req);
2022
+ SchedulePendingFlush(flush_req);
2003
2023
  } else {
2004
2024
  for (auto* cfd : cfds) {
2005
2025
  FlushRequest flush_req;
2006
- GenerateFlushRequest({cfd}, &flush_req);
2007
- SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
2026
+ GenerateFlushRequest({cfd}, FlushReason::kWriteBufferFull, &flush_req);
2027
+ SchedulePendingFlush(flush_req);
2008
2028
  }
2009
2029
  }
2010
2030
  MaybeScheduleFlushOrCompaction();
@@ -1247,10 +1247,13 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1247
1247
  }
1248
1248
 
1249
1249
  bool DBIter::Merge(const Slice* val, const Slice& user_key) {
1250
+ // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
1251
+ // since a failure must be propagated regardless of its value.
1250
1252
  Status s = MergeHelper::TimedFullMerge(
1251
1253
  merge_operator_, user_key, val, merge_context_.GetOperands(),
1252
1254
  &saved_value_, logger_, statistics_, clock_, &pinned_value_,
1253
- /* update_num_ops_stats */ true);
1255
+ /* update_num_ops_stats */ true,
1256
+ /* op_failure_scope */ nullptr);
1254
1257
  if (!s.ok()) {
1255
1258
  valid_ = false;
1256
1259
  status_ = s;
@@ -1265,10 +1268,13 @@ bool DBIter::Merge(const Slice* val, const Slice& user_key) {
1265
1268
  }
1266
1269
 
1267
1270
  bool DBIter::MergeEntity(const Slice& entity, const Slice& user_key) {
1271
+ // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
1272
+ // since a failure must be propagated regardless of its value.
1268
1273
  Status s = MergeHelper::TimedFullMergeWithEntity(
1269
1274
  merge_operator_, user_key, entity, merge_context_.GetOperands(),
1270
1275
  &saved_value_, logger_, statistics_, clock_,
1271
- /* update_num_ops_stats */ true);
1276
+ /* update_num_ops_stats */ true,
1277
+ /* op_failure_scope */ nullptr);
1272
1278
  if (!s.ok()) {
1273
1279
  valid_ = false;
1274
1280
  status_ = s;
@@ -439,6 +439,48 @@ TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) {
439
439
  }
440
440
  }
441
441
 
442
+ TEST_F(DBMergeOperandTest, GetMergeOperandsBaseDeletionInImmMem) {
443
+ // In this test, "k1" has a MERGE in a mutable memtable on top of a base
444
+ // DELETE in an immutable memtable.
445
+ Options opts = CurrentOptions();
446
+ opts.max_write_buffer_number = 10;
447
+ opts.min_write_buffer_number_to_merge = 10;
448
+ opts.merge_operator = MergeOperators::CreateDeprecatedPutOperator();
449
+ Reopen(opts);
450
+
451
+ ASSERT_OK(Put("k1", "val"));
452
+ ASSERT_OK(Flush());
453
+
454
+ ASSERT_OK(Put("k0", "val"));
455
+ ASSERT_OK(Delete("k1"));
456
+ ASSERT_OK(Put("k2", "val"));
457
+ ASSERT_OK(dbfull()->TEST_SwitchMemtable());
458
+ ASSERT_OK(Merge("k1", "val"));
459
+
460
+ {
461
+ std::vector<PinnableSlice> values(2);
462
+
463
+ GetMergeOperandsOptions merge_operands_info;
464
+ merge_operands_info.expected_max_number_of_operands =
465
+ static_cast<int>(values.size());
466
+
467
+ std::string key = "k1", from_db;
468
+ int number_of_operands = 0;
469
+ ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
470
+ key, values.data(), &merge_operands_info,
471
+ &number_of_operands));
472
+ ASSERT_EQ(1, number_of_operands);
473
+ from_db = values[0].ToString();
474
+ ASSERT_EQ("val", from_db);
475
+ }
476
+
477
+ {
478
+ std::string val;
479
+ ASSERT_OK(db_->Get(ReadOptions(), "k1", &val));
480
+ ASSERT_EQ("val", val);
481
+ }
482
+ }
483
+
442
484
  } // namespace ROCKSDB_NAMESPACE
443
485
 
444
486
  int main(int argc, char** argv) {
@@ -202,6 +202,161 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) {
202
202
  VerifyDBInternal({{"k1", "v1"}, {"k2", "corrupted"}, {"k2", "v2"}});
203
203
  }
204
204
 
205
+ #ifndef ROCKSDB_LITE
206
+
207
+ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
208
+ // This is like a mini-stress test dedicated to `OpFailureScope::kMustMerge`.
209
+ // Some or most of it might be deleted upon adding that option to the actual
210
+ // stress test.
211
+ //
212
+ // "k0" and "k2" are stable (uncorrupted) keys before and after a corrupted
213
+ // key ("k1"). The outer loop (`i`) varies which write (`j`) to "k1" triggers
214
+ // the corruption. Inside that loop there are three cases:
215
+ //
216
+ // - Case 1: pure `Merge()`s
217
+ // - Case 2: `Merge()`s on top of a `Put()`
218
+ // - Case 3: `Merge()`s on top of a `Delete()`
219
+ //
220
+ // For each case we test query results before flush, after flush, and after
221
+ // compaction, as well as cleanup after deletion+compaction. The queries
222
+ // expect "k0" and "k2" to always be readable. "k1" is expected to be readable
223
+ // only by APIs that do not require merging, such as `GetMergeOperands()`.
224
+ const int kNumOperands = 3;
225
+ Options options;
226
+ options.merge_operator.reset(new TestPutOperator());
227
+ options.env = env_;
228
+ Reopen(options);
229
+
230
+ for (int i = 0; i < kNumOperands; ++i) {
231
+ auto check_query = [&]() {
232
+ {
233
+ std::string value;
234
+ ASSERT_OK(db_->Get(ReadOptions(), "k0", &value));
235
+ ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).IsCorruption());
236
+ ASSERT_OK(db_->Get(ReadOptions(), "k2", &value));
237
+ }
238
+
239
+ {
240
+ std::unique_ptr<Iterator> iter;
241
+ iter.reset(db_->NewIterator(ReadOptions()));
242
+ iter->SeekToFirst();
243
+ ASSERT_TRUE(iter->Valid());
244
+ ASSERT_EQ("k0", iter->key());
245
+ iter->Next();
246
+ ASSERT_TRUE(iter->status().IsCorruption());
247
+
248
+ iter->SeekToLast();
249
+ ASSERT_TRUE(iter->Valid());
250
+ ASSERT_EQ("k2", iter->key());
251
+ iter->Prev();
252
+ ASSERT_TRUE(iter->status().IsCorruption());
253
+
254
+ iter->Seek("k2");
255
+ ASSERT_TRUE(iter->Valid());
256
+ ASSERT_EQ("k2", iter->key());
257
+ }
258
+
259
+ std::vector<PinnableSlice> values(kNumOperands);
260
+ GetMergeOperandsOptions merge_operands_info;
261
+ merge_operands_info.expected_max_number_of_operands = kNumOperands;
262
+ int num_operands_found = 0;
263
+ ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
264
+ "k1", values.data(), &merge_operands_info,
265
+ &num_operands_found));
266
+ ASSERT_EQ(kNumOperands, num_operands_found);
267
+ for (int j = 0; j < num_operands_found; ++j) {
268
+ if (i == j) {
269
+ ASSERT_EQ(values[j], "corrupted_must_merge");
270
+ } else {
271
+ ASSERT_EQ(values[j], "ok");
272
+ }
273
+ }
274
+ };
275
+
276
+ ASSERT_OK(Put("k0", "val"));
277
+ ASSERT_OK(Put("k2", "val"));
278
+
279
+ // Case 1
280
+ for (int j = 0; j < kNumOperands; ++j) {
281
+ if (j == i) {
282
+ ASSERT_OK(Merge("k1", "corrupted_must_merge"));
283
+ } else {
284
+ ASSERT_OK(Merge("k1", "ok"));
285
+ }
286
+ }
287
+ check_query();
288
+ ASSERT_OK(Flush());
289
+ check_query();
290
+ {
291
+ CompactRangeOptions cro;
292
+ cro.bottommost_level_compaction =
293
+ BottommostLevelCompaction::kForceOptimized;
294
+ ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
295
+ }
296
+ check_query();
297
+
298
+ // Case 2
299
+ for (int j = 0; j < kNumOperands; ++j) {
300
+ Slice val;
301
+ if (j == i) {
302
+ val = "corrupted_must_merge";
303
+ } else {
304
+ val = "ok";
305
+ }
306
+ if (j == 0) {
307
+ ASSERT_OK(Put("k1", val));
308
+ } else {
309
+ ASSERT_OK(Merge("k1", val));
310
+ }
311
+ }
312
+ check_query();
313
+ ASSERT_OK(Flush());
314
+ check_query();
315
+ {
316
+ CompactRangeOptions cro;
317
+ cro.bottommost_level_compaction =
318
+ BottommostLevelCompaction::kForceOptimized;
319
+ ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
320
+ }
321
+ check_query();
322
+
323
+ // Case 3
324
+ ASSERT_OK(Delete("k1"));
325
+ for (int j = 0; j < kNumOperands; ++j) {
326
+ if (i == j) {
327
+ ASSERT_OK(Merge("k1", "corrupted_must_merge"));
328
+ } else {
329
+ ASSERT_OK(Merge("k1", "ok"));
330
+ }
331
+ }
332
+ check_query();
333
+ ASSERT_OK(Flush());
334
+ check_query();
335
+ {
336
+ CompactRangeOptions cro;
337
+ cro.bottommost_level_compaction =
338
+ BottommostLevelCompaction::kForceOptimized;
339
+ ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
340
+ }
341
+ check_query();
342
+
343
+ // Verify obsolete data removal still happens
344
+ ASSERT_OK(Delete("k0"));
345
+ ASSERT_OK(Delete("k1"));
346
+ ASSERT_OK(Delete("k2"));
347
+ ASSERT_EQ("NOT_FOUND", Get("k0"));
348
+ ASSERT_EQ("NOT_FOUND", Get("k1"));
349
+ ASSERT_EQ("NOT_FOUND", Get("k2"));
350
+ CompactRangeOptions cro;
351
+ cro.bottommost_level_compaction =
352
+ BottommostLevelCompaction::kForceOptimized;
353
+ ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
354
+ ASSERT_EQ("", FilesPerLevel());
355
+ }
356
+ }
357
+
358
+ #endif // ROCKSDB_LITE
359
+
205
360
  class MergeOperatorPinningTest : public DBMergeOperatorTest,
206
361
  public testing::WithParamInterface<bool> {
207
362
  public:
@@ -1848,8 +1848,8 @@ TEST_F(DBPropertiesTest, BlobCacheProperties) {
1848
1848
 
1849
1849
  // Insert unpinned blob to the cache and check size.
1850
1850
  constexpr size_t kSize1 = 70;
1851
- ASSERT_OK(blob_cache->Insert("blob1", nullptr /*value*/, kSize1,
1852
- nullptr /*deleter*/));
1851
+ ASSERT_OK(blob_cache->Insert("blob1", nullptr /*value*/,
1852
+ &kNoopCacheItemHelper, kSize1));
1853
1853
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
1854
1854
  ASSERT_EQ(kCapacity, value);
1855
1855
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
@@ -1861,8 +1861,8 @@ TEST_F(DBPropertiesTest, BlobCacheProperties) {
1861
1861
  // Insert pinned blob to the cache and check size.
1862
1862
  constexpr size_t kSize2 = 60;
1863
1863
  Cache::Handle* blob2 = nullptr;
1864
- ASSERT_OK(blob_cache->Insert("blob2", nullptr /*value*/, kSize2,
1865
- nullptr /*deleter*/, &blob2));
1864
+ ASSERT_OK(blob_cache->Insert("blob2", nullptr /*value*/,
1865
+ &kNoopCacheItemHelper, kSize2, &blob2));
1866
1866
  ASSERT_NE(nullptr, blob2);
1867
1867
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
1868
1868
  ASSERT_EQ(kCapacity, value);
@@ -1876,8 +1876,8 @@ TEST_F(DBPropertiesTest, BlobCacheProperties) {
1876
1876
  // Insert another pinned blob to make the cache over-sized.
1877
1877
  constexpr size_t kSize3 = 80;
1878
1878
  Cache::Handle* blob3 = nullptr;
1879
- ASSERT_OK(blob_cache->Insert("blob3", nullptr /*value*/, kSize3,
1880
- nullptr /*deleter*/, &blob3));
1879
+ ASSERT_OK(blob_cache->Insert("blob3", nullptr /*value*/,
1880
+ &kNoopCacheItemHelper, kSize3, &blob3));
1881
1881
  ASSERT_NE(nullptr, blob3);
1882
1882
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
1883
1883
  ASSERT_EQ(kCapacity, value);
@@ -1956,8 +1956,8 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) {
1956
1956
 
1957
1957
  // Insert unpinned item to the cache and check size.
1958
1958
  constexpr size_t kSize1 = 50;
1959
- ASSERT_OK(block_cache->Insert("item1", nullptr /*value*/, kSize1,
1960
- nullptr /*deleter*/));
1959
+ ASSERT_OK(block_cache->Insert("item1", nullptr /*value*/,
1960
+ &kNoopCacheItemHelper, kSize1));
1961
1961
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value));
1962
1962
  ASSERT_EQ(kCapacity, value);
1963
1963
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value));
@@ -1969,8 +1969,8 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) {
1969
1969
  // Insert pinned item to the cache and check size.
1970
1970
  constexpr size_t kSize2 = 30;
1971
1971
  Cache::Handle* item2 = nullptr;
1972
- ASSERT_OK(block_cache->Insert("item2", nullptr /*value*/, kSize2,
1973
- nullptr /*deleter*/, &item2));
1972
+ ASSERT_OK(block_cache->Insert("item2", nullptr /*value*/,
1973
+ &kNoopCacheItemHelper, kSize2, &item2));
1974
1974
  ASSERT_NE(nullptr, item2);
1975
1975
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value));
1976
1976
  ASSERT_EQ(kCapacity, value);
@@ -1983,8 +1983,8 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) {
1983
1983
  // Insert another pinned item to make the cache over-sized.
1984
1984
  constexpr size_t kSize3 = 80;
1985
1985
  Cache::Handle* item3 = nullptr;
1986
- ASSERT_OK(block_cache->Insert("item3", nullptr /*value*/, kSize3,
1987
- nullptr /*deleter*/, &item3));
1986
+ ASSERT_OK(block_cache->Insert("item3", nullptr /*value*/,
1987
+ &kNoopCacheItemHelper, kSize3, &item3));
1988
1988
  ASSERT_NE(nullptr, item2);
1989
1989
  ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value));
1990
1990
  ASSERT_EQ(kCapacity, value);