@nxtedition/rocksdb 8.1.4 → 8.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -746,6 +746,64 @@ class TestFlushListener : public EventListener {
746
746
  };
747
747
  #endif // !ROCKSDB_LITE
748
748
 
749
+ // RocksDB lite does not support GetLiveFiles()
750
+ #ifndef ROCKSDB_LITE
751
+ TEST_F(DBFlushTest, FixFlushReasonRaceFromConcurrentFlushes) {
752
+ Options options = CurrentOptions();
753
+ options.atomic_flush = true;
754
+ options.disable_auto_compactions = true;
755
+ CreateAndReopenWithCF({"cf1"}, options);
756
+
757
+ for (int idx = 0; idx < 1; ++idx) {
758
+ ASSERT_OK(Put(0, Key(idx), std::string(1, 'v')));
759
+ ASSERT_OK(Put(1, Key(idx), std::string(1, 'v')));
760
+ }
761
+
762
+ // To coerce a manual flush happenning in the middle of GetLiveFiles's flush,
763
+ // we need to pause background flush thread and enable it later.
764
+ std::shared_ptr<test::SleepingBackgroundTask> sleeping_task =
765
+ std::make_shared<test::SleepingBackgroundTask>();
766
+ env_->SetBackgroundThreads(1, Env::HIGH);
767
+ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
768
+ sleeping_task.get(), Env::Priority::HIGH);
769
+ sleeping_task->WaitUntilSleeping();
770
+
771
+ // Coerce a manual flush happenning in the middle of GetLiveFiles's flush
772
+ bool get_live_files_paused_at_sync_point = false;
773
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
774
+ "DBImpl::AtomicFlushMemTables:AfterScheduleFlush", [&](void* /* arg */) {
775
+ if (get_live_files_paused_at_sync_point) {
776
+ // To prevent non-GetLiveFiles() flush from pausing at this sync point
777
+ return;
778
+ }
779
+ get_live_files_paused_at_sync_point = true;
780
+
781
+ FlushOptions fo;
782
+ fo.wait = false;
783
+ fo.allow_write_stall = true;
784
+ ASSERT_OK(dbfull()->Flush(fo));
785
+
786
+ // Resume background flush thread so GetLiveFiles() can finish
787
+ sleeping_task->WakeUp();
788
+ sleeping_task->WaitUntilDone();
789
+ });
790
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
791
+
792
+ std::vector<std::string> files;
793
+ uint64_t manifest_file_size;
794
+ // Before the fix, a race condition on default cf's flush reason due to
795
+ // concurrent GetLiveFiles's flush and manual flush will fail
796
+ // an internal assertion.
797
+ // After the fix, such race condition is fixed and there is no assertion
798
+ // failure.
799
+ ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true));
800
+ ASSERT_TRUE(get_live_files_paused_at_sync_point);
801
+
802
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
803
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
804
+ }
805
+ #endif // !ROCKSDB_LITE
806
+
749
807
  TEST_F(DBFlushTest, MemPurgeBasic) {
750
808
  Options options = CurrentOptions();
751
809
 
@@ -2440,7 +2498,9 @@ TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) {
2440
2498
  options.atomic_flush = GetParam();
2441
2499
  // 64MB so that memtable flush won't be trigger by the small writes.
2442
2500
  options.write_buffer_size = (static_cast<size_t>(64) << 20);
2443
-
2501
+ auto flush_listener = std::make_shared<FlushCounterListener>();
2502
+ flush_listener->expected_flush_reason = FlushReason::kManualFlush;
2503
+ options.listeners.push_back(flush_listener);
2444
2504
  // Destroy the DB to recreate as a TransactionDB.
2445
2505
  Close();
2446
2506
  Destroy(options, true);
@@ -2507,7 +2567,6 @@ TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) {
2507
2567
  auto cfh = static_cast<ColumnFamilyHandleImpl*>(handles_[i]);
2508
2568
  ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed());
2509
2569
  ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty());
2510
- ASSERT_EQ(cfh->cfd()->GetFlushReason(), FlushReason::kManualFlush);
2511
2570
  }
2512
2571
 
2513
2572
  // The recovered min log number with prepared data should be non-zero.
@@ -2520,13 +2579,15 @@ TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) {
2520
2579
  ASSERT_TRUE(db_impl->allow_2pc());
2521
2580
  ASSERT_NE(db_impl->MinLogNumberToKeep(), 0);
2522
2581
  }
2523
- #endif // ROCKSDB_LITE
2524
2582
 
2525
2583
  TEST_P(DBAtomicFlushTest, ManualAtomicFlush) {
2526
2584
  Options options = CurrentOptions();
2527
2585
  options.create_if_missing = true;
2528
2586
  options.atomic_flush = GetParam();
2529
2587
  options.write_buffer_size = (static_cast<size_t>(64) << 20);
2588
+ auto flush_listener = std::make_shared<FlushCounterListener>();
2589
+ flush_listener->expected_flush_reason = FlushReason::kManualFlush;
2590
+ options.listeners.push_back(flush_listener);
2530
2591
 
2531
2592
  CreateAndReopenWithCF({"pikachu", "eevee"}, options);
2532
2593
  size_t num_cfs = handles_.size();
@@ -2551,11 +2612,11 @@ TEST_P(DBAtomicFlushTest, ManualAtomicFlush) {
2551
2612
 
2552
2613
  for (size_t i = 0; i != num_cfs; ++i) {
2553
2614
  auto cfh = static_cast<ColumnFamilyHandleImpl*>(handles_[i]);
2554
- ASSERT_EQ(cfh->cfd()->GetFlushReason(), FlushReason::kManualFlush);
2555
2615
  ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed());
2556
2616
  ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty());
2557
2617
  }
2558
2618
  }
2619
+ #endif // ROCKSDB_LITE
2559
2620
 
2560
2621
  TEST_P(DBAtomicFlushTest, PrecomputeMinLogNumberToKeepNon2PC) {
2561
2622
  Options options = CurrentOptions();
@@ -604,7 +604,7 @@ Status DBImpl::CloseHelper() {
604
604
 
605
605
  while (!flush_queue_.empty()) {
606
606
  const FlushRequest& flush_req = PopFirstFromFlushQueue();
607
- for (const auto& iter : flush_req) {
607
+ for (const auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
608
608
  iter.first->UnrefAndTryDelete();
609
609
  }
610
610
  }
@@ -1570,21 +1570,31 @@ Status DBImpl::ApplyWALToManifest(VersionEdit* synced_wals) {
1570
1570
  }
1571
1571
 
1572
1572
  Status DBImpl::LockWAL() {
1573
- log_write_mutex_.Lock();
1574
- auto cur_log_writer = logs_.back().writer;
1575
- IOStatus status = cur_log_writer->WriteBuffer();
1576
- if (!status.ok()) {
1577
- ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
1578
- status.ToString().c_str());
1579
- // In case there is a fs error we should set it globally to prevent the
1580
- // future writes
1581
- WriteStatusCheck(status);
1573
+ {
1574
+ InstrumentedMutexLock lock(&mutex_);
1575
+ WriteThread::Writer w;
1576
+ write_thread_.EnterUnbatched(&w, &mutex_);
1577
+ WriteThread::Writer nonmem_w;
1578
+ if (two_write_queues_) {
1579
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1580
+ }
1581
+
1582
+ lock_wal_write_token_ = write_controller_.GetStopToken();
1583
+
1584
+ if (two_write_queues_) {
1585
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1586
+ }
1587
+ write_thread_.ExitUnbatched(&w);
1582
1588
  }
1583
- return static_cast<Status>(status);
1589
+ return FlushWAL(/*sync=*/false);
1584
1590
  }
1585
1591
 
1586
1592
  Status DBImpl::UnlockWAL() {
1587
- log_write_mutex_.Unlock();
1593
+ {
1594
+ InstrumentedMutexLock lock(&mutex_);
1595
+ lock_wal_write_token_.reset();
1596
+ }
1597
+ bg_cv_.SignalAll();
1588
1598
  return Status::OK();
1589
1599
  }
1590
1600
 
@@ -5189,8 +5199,9 @@ Status DBImpl::IngestExternalFiles(
5189
5199
  for (const auto& arg : args) {
5190
5200
  auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
5191
5201
  ingestion_jobs.emplace_back(versions_.get(), cfd, immutable_db_options_,
5192
- file_options_, &snapshots_, arg.options,
5193
- &directories_, &event_logger_, io_tracer_);
5202
+ mutable_db_options_, file_options_, &snapshots_,
5203
+ arg.options, &directories_, &event_logger_,
5204
+ io_tracer_);
5194
5205
  }
5195
5206
 
5196
5207
  // TODO(yanqin) maybe make jobs run in parallel
@@ -5318,10 +5329,12 @@ Status DBImpl::IngestExternalFiles(
5318
5329
  // Run ingestion jobs.
5319
5330
  if (status.ok()) {
5320
5331
  for (size_t i = 0; i != num_cfs; ++i) {
5332
+ mutex_.AssertHeld();
5321
5333
  status = ingestion_jobs[i].Run();
5322
5334
  if (!status.ok()) {
5323
5335
  break;
5324
5336
  }
5337
+ ingestion_jobs[i].RegisterRange();
5325
5338
  }
5326
5339
  }
5327
5340
  if (status.ok()) {
@@ -5377,6 +5390,10 @@ Status DBImpl::IngestExternalFiles(
5377
5390
  }
5378
5391
  }
5379
5392
 
5393
+ for (auto& job : ingestion_jobs) {
5394
+ job.UnregisterRange();
5395
+ }
5396
+
5380
5397
  if (status.ok()) {
5381
5398
  for (size_t i = 0; i != num_cfs; ++i) {
5382
5399
  auto* cfd =
@@ -5522,6 +5539,7 @@ Status DBImpl::CreateColumnFamilyWithImport(
5522
5539
 
5523
5540
  num_running_ingest_file_++;
5524
5541
  assert(!cfd->IsDropped());
5542
+ mutex_.AssertHeld();
5525
5543
  status = import_job.Run();
5526
5544
 
5527
5545
  // Install job edit [Mutex will be unlocked here]
@@ -5747,13 +5765,6 @@ void DBImpl::NotifyOnExternalFileIngested(
5747
5765
  }
5748
5766
  }
5749
5767
 
5750
- void DBImpl::WaitForIngestFile() {
5751
- mutex_.AssertHeld();
5752
- while (num_running_ingest_file_ > 0) {
5753
- bg_cv_.Wait();
5754
- }
5755
- }
5756
-
5757
5768
  Status DBImpl::StartTrace(const TraceOptions& trace_options,
5758
5769
  std::unique_ptr<TraceWriter>&& trace_writer) {
5759
5770
  InstrumentedMutexLock lock(&trace_mutex_);
@@ -16,6 +16,7 @@
16
16
  #include <map>
17
17
  #include <set>
18
18
  #include <string>
19
+ #include <unordered_map>
19
20
  #include <utility>
20
21
  #include <vector>
21
22
 
@@ -1161,7 +1162,7 @@ class DBImpl : public DB {
1161
1162
  int TEST_BGCompactionsAllowed() const;
1162
1163
  int TEST_BGFlushesAllowed() const;
1163
1164
  size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
1164
- void TEST_WaitForPeridicTaskRun(std::function<void()> callback) const;
1165
+ void TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const;
1165
1166
  SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const;
1166
1167
  size_t TEST_EstimateInMemoryStatsHistorySize() const;
1167
1168
 
@@ -1383,7 +1384,7 @@ class DBImpl : public DB {
1383
1384
 
1384
1385
  void NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
1385
1386
  const MutableCFOptions& mutable_cf_options,
1386
- int job_id);
1387
+ int job_id, FlushReason flush_reason);
1387
1388
 
1388
1389
  void NotifyOnFlushCompleted(
1389
1390
  ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
@@ -1675,12 +1676,17 @@ class DBImpl : public DB {
1675
1676
  // Argument required by background flush thread.
1676
1677
  struct BGFlushArg {
1677
1678
  BGFlushArg()
1678
- : cfd_(nullptr), max_memtable_id_(0), superversion_context_(nullptr) {}
1679
+ : cfd_(nullptr),
1680
+ max_memtable_id_(0),
1681
+ superversion_context_(nullptr),
1682
+ flush_reason_(FlushReason::kOthers) {}
1679
1683
  BGFlushArg(ColumnFamilyData* cfd, uint64_t max_memtable_id,
1680
- SuperVersionContext* superversion_context)
1684
+ SuperVersionContext* superversion_context,
1685
+ FlushReason flush_reason)
1681
1686
  : cfd_(cfd),
1682
1687
  max_memtable_id_(max_memtable_id),
1683
- superversion_context_(superversion_context) {}
1688
+ superversion_context_(superversion_context),
1689
+ flush_reason_(flush_reason) {}
1684
1690
 
1685
1691
  // Column family to flush.
1686
1692
  ColumnFamilyData* cfd_;
@@ -1691,6 +1697,7 @@ class DBImpl : public DB {
1691
1697
  // installs a new superversion for the column family. This operation
1692
1698
  // requires a SuperVersionContext object (currently embedded in JobContext).
1693
1699
  SuperVersionContext* superversion_context_;
1700
+ FlushReason flush_reason_;
1694
1701
  };
1695
1702
 
1696
1703
  // Argument passed to flush thread.
@@ -1819,7 +1826,7 @@ class DBImpl : public DB {
1819
1826
  // installs a new super version for the column family.
1820
1827
  Status FlushMemTableToOutputFile(
1821
1828
  ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
1822
- bool* madeProgress, JobContext* job_context,
1829
+ bool* madeProgress, JobContext* job_context, FlushReason flush_reason,
1823
1830
  SuperVersionContext* superversion_context,
1824
1831
  std::vector<SequenceNumber>& snapshot_seqs,
1825
1832
  SequenceNumber earliest_write_conflict_snapshot,
@@ -1865,7 +1872,8 @@ class DBImpl : public DB {
1865
1872
 
1866
1873
  // num_bytes: for slowdown case, delay time is calculated based on
1867
1874
  // `num_bytes` going through.
1868
- Status DelayWrite(uint64_t num_bytes, const WriteOptions& write_options);
1875
+ Status DelayWrite(uint64_t num_bytes, WriteThread& write_thread,
1876
+ const WriteOptions& write_options);
1869
1877
 
1870
1878
  // Begin stalling of writes when memory usage increases beyond a certain
1871
1879
  // threshold.
@@ -2023,32 +2031,28 @@ class DBImpl : public DB {
2023
2031
  const int output_level, int output_path_id,
2024
2032
  JobContext* job_context, LogBuffer* log_buffer,
2025
2033
  CompactionJobInfo* compaction_job_info);
2026
-
2027
- // Wait for current IngestExternalFile() calls to finish.
2028
- // REQUIRES: mutex_ held
2029
- void WaitForIngestFile();
2030
- #else
2031
- // IngestExternalFile is not supported in ROCKSDB_LITE so this function
2032
- // will be no-op
2033
- void WaitForIngestFile() {}
2034
2034
  #endif // ROCKSDB_LITE
2035
2035
 
2036
2036
  ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);
2037
2037
 
2038
2038
  void MaybeScheduleFlushOrCompaction();
2039
2039
 
2040
- // A flush request specifies the column families to flush as well as the
2041
- // largest memtable id to persist for each column family. Once all the
2042
- // memtables whose IDs are smaller than or equal to this per-column-family
2043
- // specified value, this flush request is considered to have completed its
2044
- // work of flushing this column family. After completing the work for all
2045
- // column families in this request, this flush is considered complete.
2046
- using FlushRequest = std::vector<std::pair<ColumnFamilyData*, uint64_t>>;
2040
+ struct FlushRequest {
2041
+ FlushReason flush_reason;
2042
+ // A map from column family to flush to largest memtable id to persist for
2043
+ // each column family. Once all the memtables whose IDs are smaller than or
2044
+ // equal to this per-column-family specified value, this flush request is
2045
+ // considered to have completed its work of flushing this column family.
2046
+ // After completing the work for all column families in this request, this
2047
+ // flush is considered complete.
2048
+ std::unordered_map<ColumnFamilyData*, uint64_t>
2049
+ cfd_to_max_mem_id_to_persist;
2050
+ };
2047
2051
 
2048
2052
  void GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
2049
- FlushRequest* req);
2053
+ FlushReason flush_reason, FlushRequest* req);
2050
2054
 
2051
- void SchedulePendingFlush(const FlushRequest& req, FlushReason flush_reason);
2055
+ void SchedulePendingFlush(const FlushRequest& req);
2052
2056
 
2053
2057
  void SchedulePendingCompaction(ColumnFamilyData* cfd);
2054
2058
  void SchedulePendingPurge(std::string fname, std::string dir_to_sync,
@@ -2680,6 +2684,10 @@ class DBImpl : public DB {
2680
2684
  // seqno_time_mapping_ stores the sequence number to time mapping, it's not
2681
2685
  // thread safe, both read and write need db mutex hold.
2682
2686
  SeqnoToTimeMapping seqno_time_mapping_;
2687
+
2688
+ // stop write token that is acquired when LockWal() is called. Destructed
2689
+ // when UnlockWal() is called.
2690
+ std::unique_ptr<WriteControllerToken> lock_wal_write_token_;
2683
2691
  };
2684
2692
 
2685
2693
  class GetWithTimestampReadCallback : public ReadCallback {