@nxtedition/rocksdb 7.1.14 → 7.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/binding.cc +1 -0
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
  3. package/deps/rocksdb/rocksdb/Makefile +91 -11
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -4
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
  14. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
  19. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
  20. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
  21. package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
  22. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
  23. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
  24. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
  27. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
  30. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
  31. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
  34. package/deps/rocksdb/rocksdb/db/c.cc +29 -0
  35. package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
  36. package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
  46. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
  47. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
  48. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
  50. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
  51. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
  63. package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
  64. package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
  65. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
  66. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
  67. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
  68. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
  69. package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
  70. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
  71. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
  72. package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
  73. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
  74. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
  75. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
  76. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
  77. package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
  80. package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
  82. package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
  83. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
  84. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
  85. package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
  86. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
  87. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
  88. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
  89. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
  90. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
  91. package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
  92. package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
  93. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
  94. package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
  95. package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
  96. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  97. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
  98. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
  99. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  100. package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
  101. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
  102. package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
  103. package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
  104. package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
  105. package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
  106. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
  107. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
  108. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
  109. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
  112. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
  113. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
  114. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
  115. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
  118. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
  119. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
  120. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
  121. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  122. package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
  123. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
  124. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
  125. package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
  126. package/deps/rocksdb/rocksdb/file/filename.h +4 -2
  127. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
  128. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
  129. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
  130. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
  131. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
  132. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
  134. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
  135. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
  136. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
  137. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
  139. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
  140. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
  142. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
  143. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
  144. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  145. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
  148. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
  149. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
  150. package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
  151. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
  152. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
  153. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
  154. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
  155. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
  156. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  157. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
  158. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
  159. package/deps/rocksdb/rocksdb/options/options.cc +2 -2
  160. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
  161. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
  162. package/deps/rocksdb/rocksdb/src.mk +4 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
  170. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
  171. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
  172. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
  173. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
  174. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
  175. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  176. package/deps/rocksdb/rocksdb/table/format.cc +26 -29
  177. package/deps/rocksdb/rocksdb/table/format.h +44 -26
  178. package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
  179. package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
  180. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
  181. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
  182. package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
  183. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
  184. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
  185. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
  186. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
  187. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
  188. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
  189. package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
  190. package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
  191. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
  192. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
  193. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
  194. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
  195. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
  196. package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
  197. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
  198. package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
  199. package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
  200. package/deps/rocksdb/rocksdb/util/timer.h +2 -3
  201. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
  202. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
  203. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
  204. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
  205. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
  206. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
  207. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
  208. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
  209. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
  210. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
  211. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
  212. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
  213. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  214. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
  217. package/deps/rocksdb/rocksdb.gyp +0 -1
  218. package/index.js +6 -10
  219. package/package.json +1 -1
  220. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  221. package/prebuilds/linux-x64/node.napi.node +0 -0
  222. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
@@ -328,11 +328,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
328
328
  Arena arena;
329
329
  {
330
330
  InternalKeyComparator icmp(options.comparator);
331
- ReadRangeDelAggregator range_del_agg(&icmp,
332
- kMaxSequenceNumber /* upper_bound */);
333
331
  ReadOptions read_options;
334
332
  ScopedArenaIterator iter(dbfull()->NewInternalIterator(
335
- read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
333
+ read_options, &arena, kMaxSequenceNumber, handles_[1]));
336
334
  iter->SeekToFirst();
337
335
  ASSERT_OK(iter->status());
338
336
  while (iter->Valid()) {
@@ -422,11 +420,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
422
420
  count = 0;
423
421
  {
424
422
  InternalKeyComparator icmp(options.comparator);
425
- ReadRangeDelAggregator range_del_agg(&icmp,
426
- kMaxSequenceNumber /* upper_bound */);
427
423
  ReadOptions read_options;
428
424
  ScopedArenaIterator iter(dbfull()->NewInternalIterator(
429
- read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
425
+ read_options, &arena, kMaxSequenceNumber, handles_[1]));
430
426
  iter->SeekToFirst();
431
427
  ASSERT_OK(iter->status());
432
428
  while (iter->Valid()) {
@@ -701,11 +697,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) {
701
697
  int total = 0;
702
698
  Arena arena;
703
699
  InternalKeyComparator icmp(options.comparator);
704
- ReadRangeDelAggregator range_del_agg(&icmp,
705
- kMaxSequenceNumber /* snapshots */);
706
700
  ReadOptions read_options;
707
- ScopedArenaIterator iter(dbfull()->NewInternalIterator(
708
- read_options, &arena, &range_del_agg, kMaxSequenceNumber));
701
+ ScopedArenaIterator iter(dbfull()->NewInternalIterator(read_options, &arena,
702
+ kMaxSequenceNumber));
709
703
  iter->SeekToFirst();
710
704
  ASSERT_OK(iter->status());
711
705
  while (iter->Valid()) {
@@ -242,7 +242,7 @@ Status CompactedDBImpl::Open(const Options& options,
242
242
  std::unique_ptr<CompactedDBImpl> db(new CompactedDBImpl(db_options, dbname));
243
243
  Status s = db->Init(options);
244
244
  if (s.ok()) {
245
- s = db->StartPeriodicWorkScheduler();
245
+ s = db->StartPeriodicTaskScheduler();
246
246
  }
247
247
  if (s.ok()) {
248
248
  ROCKS_LOG_INFO(db->immutable_db_options_.info_log,
@@ -11,6 +11,7 @@
11
11
 
12
12
  namespace ROCKSDB_NAMESPACE {
13
13
 
14
+ // TODO: Share common structure with DBImplSecondary and DBImplReadOnly
14
15
  class CompactedDBImpl : public DBImpl {
15
16
  public:
16
17
  CompactedDBImpl(const DBOptions& options, const std::string& dbname);
@@ -127,6 +128,17 @@ class CompactedDBImpl : public DBImpl {
127
128
  return Status::NotSupported("Not supported in compacted db mode.");
128
129
  }
129
130
 
131
+ // FIXME: some missing overrides for more "write" functions
132
+ // Share with DBImplReadOnly?
133
+
134
+ protected:
135
+ #ifndef ROCKSDB_LITE
136
+ Status FlushForGetLiveFiles() override {
137
+ // No-op for read-only DB
138
+ return Status::OK();
139
+ }
140
+ #endif // !ROCKSDB_LITE
141
+
130
142
  private:
131
143
  friend class DB;
132
144
  inline size_t FindFile(const Slice& key);
@@ -45,7 +45,7 @@
45
45
  #include "db/memtable_list.h"
46
46
  #include "db/merge_context.h"
47
47
  #include "db/merge_helper.h"
48
- #include "db/periodic_work_scheduler.h"
48
+ #include "db/periodic_task_scheduler.h"
49
49
  #include "db/range_tombstone_fragmenter.h"
50
50
  #include "db/table_cache.h"
51
51
  #include "db/table_properties_collector.h"
@@ -165,8 +165,13 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
165
165
  fs_(immutable_db_options_.fs, io_tracer_),
166
166
  mutable_db_options_(initial_db_options_),
167
167
  stats_(immutable_db_options_.stats),
168
+ #ifdef COERCE_CONTEXT_SWITCH
169
+ mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS, &bg_cv_,
170
+ immutable_db_options_.use_adaptive_mutex),
171
+ #else // COERCE_CONTEXT_SWITCH
168
172
  mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS,
169
173
  immutable_db_options_.use_adaptive_mutex),
174
+ #endif // COERCE_CONTEXT_SWITCH
170
175
  default_cf_handle_(nullptr),
171
176
  error_handler_(this, immutable_db_options_, &mutex_),
172
177
  event_logger_(immutable_db_options_.info_log.get()),
@@ -217,7 +222,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
217
222
  refitting_level_(false),
218
223
  opened_successfully_(false),
219
224
  #ifndef ROCKSDB_LITE
220
- periodic_work_scheduler_(nullptr),
225
+ periodic_task_scheduler_(),
221
226
  #endif // ROCKSDB_LITE
222
227
  two_write_queues_(options.two_write_queues),
223
228
  manual_wal_flush_(options.manual_wal_flush),
@@ -260,6 +265,18 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
260
265
  SetDbSessionId();
261
266
  assert(!db_session_id_.empty());
262
267
 
268
+ #ifndef ROCKSDB_LITE
269
+ periodic_task_functions_.emplace(PeriodicTaskType::kDumpStats,
270
+ [this]() { this->DumpStats(); });
271
+ periodic_task_functions_.emplace(PeriodicTaskType::kPersistStats,
272
+ [this]() { this->PersistStats(); });
273
+ periodic_task_functions_.emplace(PeriodicTaskType::kFlushInfoLog,
274
+ [this]() { this->FlushInfoLog(); });
275
+ periodic_task_functions_.emplace(
276
+ PeriodicTaskType::kRecordSeqnoTime,
277
+ [this]() { this->RecordSeqnoToTimeMapping(); });
278
+ #endif // ROCKSDB_LITE
279
+
263
280
  versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_,
264
281
  table_cache_.get(), write_buffer_manager_,
265
282
  &write_controller_, &block_cache_tracer_,
@@ -480,9 +497,15 @@ void DBImpl::CancelAllBackgroundWork(bool wait) {
480
497
  "Shutdown: canceling all background work");
481
498
 
482
499
  #ifndef ROCKSDB_LITE
483
- if (periodic_work_scheduler_ != nullptr) {
484
- periodic_work_scheduler_->Unregister(this);
485
- periodic_work_scheduler_->UnregisterRecordSeqnoTimeWorker(this);
500
+ for (uint8_t task_type = 0;
501
+ task_type < static_cast<uint8_t>(PeriodicTaskType::kMax); task_type++) {
502
+ Status s = periodic_task_scheduler_.Unregister(
503
+ static_cast<PeriodicTaskType>(task_type));
504
+ if (!s.ok()) {
505
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
506
+ "Failed to unregister periodic task %d, status: %s",
507
+ task_type, s.ToString().c_str());
508
+ }
486
509
  }
487
510
  #endif // !ROCKSDB_LITE
488
511
 
@@ -767,30 +790,50 @@ void DBImpl::PrintStatistics() {
767
790
  }
768
791
  }
769
792
 
770
- Status DBImpl::StartPeriodicWorkScheduler() {
793
+ Status DBImpl::StartPeriodicTaskScheduler() {
771
794
  #ifndef ROCKSDB_LITE
772
795
 
773
796
  #ifndef NDEBUG
774
797
  // It only used by test to disable scheduler
775
798
  bool disable_scheduler = false;
776
799
  TEST_SYNC_POINT_CALLBACK(
777
- "DBImpl::StartPeriodicWorkScheduler:DisableScheduler",
800
+ "DBImpl::StartPeriodicTaskScheduler:DisableScheduler",
778
801
  &disable_scheduler);
779
802
  if (disable_scheduler) {
780
803
  return Status::OK();
781
804
  }
782
- #endif // !NDEBUG
783
805
 
784
806
  {
785
807
  InstrumentedMutexLock l(&mutex_);
786
- periodic_work_scheduler_ = PeriodicWorkScheduler::Default();
787
- TEST_SYNC_POINT_CALLBACK("DBImpl::StartPeriodicWorkScheduler:Init",
788
- &periodic_work_scheduler_);
808
+ TEST_SYNC_POINT_CALLBACK("DBImpl::StartPeriodicTaskScheduler:Init",
809
+ &periodic_task_scheduler_);
789
810
  }
790
811
 
791
- return periodic_work_scheduler_->Register(
792
- this, mutable_db_options_.stats_dump_period_sec,
793
- mutable_db_options_.stats_persist_period_sec);
812
+ #endif // !NDEBUG
813
+ if (mutable_db_options_.stats_dump_period_sec > 0) {
814
+ Status s = periodic_task_scheduler_.Register(
815
+ PeriodicTaskType::kDumpStats,
816
+ periodic_task_functions_.at(PeriodicTaskType::kDumpStats),
817
+ mutable_db_options_.stats_dump_period_sec);
818
+ if (!s.ok()) {
819
+ return s;
820
+ }
821
+ }
822
+ if (mutable_db_options_.stats_persist_period_sec > 0) {
823
+ Status s = periodic_task_scheduler_.Register(
824
+ PeriodicTaskType::kPersistStats,
825
+ periodic_task_functions_.at(PeriodicTaskType::kPersistStats),
826
+ mutable_db_options_.stats_persist_period_sec);
827
+ if (!s.ok()) {
828
+ return s;
829
+ }
830
+ }
831
+
832
+ Status s = periodic_task_scheduler_.Register(
833
+ PeriodicTaskType::kFlushInfoLog,
834
+ periodic_task_functions_.at(PeriodicTaskType::kFlushInfoLog));
835
+
836
+ return s;
794
837
  #else
795
838
  return Status::OK();
796
839
  #endif // !ROCKSDB_LITE
@@ -798,9 +841,6 @@ Status DBImpl::StartPeriodicWorkScheduler() {
798
841
 
799
842
  Status DBImpl::RegisterRecordSeqnoTimeWorker() {
800
843
  #ifndef ROCKSDB_LITE
801
- if (!periodic_work_scheduler_) {
802
- return Status::OK();
803
- }
804
844
  uint64_t min_time_duration = std::numeric_limits<uint64_t>::max();
805
845
  uint64_t max_time_duration = std::numeric_limits<uint64_t>::min();
806
846
  {
@@ -828,26 +868,13 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() {
828
868
  }
829
869
 
830
870
  Status s;
831
- if (seqno_time_cadence != record_seqno_time_cadence_) {
832
- if (seqno_time_cadence == 0) {
833
- periodic_work_scheduler_->UnregisterRecordSeqnoTimeWorker(this);
834
- } else {
835
- s = periodic_work_scheduler_->RegisterRecordSeqnoTimeWorker(
836
- this, seqno_time_cadence);
837
- }
838
-
839
- if (s.ok()) {
840
- record_seqno_time_cadence_ = seqno_time_cadence;
841
- }
842
-
843
- if (s.IsNotSupported()) {
844
- // TODO: Fix the timer cannot cancel and re-add the same task
845
- ROCKS_LOG_WARN(
846
- immutable_db_options_.info_log,
847
- "Updating seqno to time worker cadence is not supported yet, to make "
848
- "the change effective, please reopen the DB instance.");
849
- s = Status::OK();
850
- }
871
+ if (seqno_time_cadence == 0) {
872
+ s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kRecordSeqnoTime);
873
+ } else {
874
+ s = periodic_task_scheduler_.Register(
875
+ PeriodicTaskType::kRecordSeqnoTime,
876
+ periodic_task_functions_.at(PeriodicTaskType::kRecordSeqnoTime),
877
+ seqno_time_cadence);
851
878
  }
852
879
 
853
880
  return s;
@@ -1087,6 +1114,10 @@ void DBImpl::DumpStats() {
1087
1114
  PrintStatistics();
1088
1115
  }
1089
1116
 
1117
+ // Periodically flush info log out of application buffer at a low frequency.
1118
+ // This improves debuggability in case of RocksDB hanging since it ensures the
1119
+ // log messages leading up to the hang will eventually become visible in the
1120
+ // log.
1090
1121
  void DBImpl::FlushInfoLog() {
1091
1122
  if (shutdown_initiated_) {
1092
1123
  return;
@@ -1279,22 +1310,36 @@ Status DBImpl::SetDBOptions(
1279
1310
  MaybeScheduleFlushOrCompaction();
1280
1311
  }
1281
1312
 
1282
- if (new_options.stats_dump_period_sec !=
1283
- mutable_db_options_.stats_dump_period_sec ||
1284
- new_options.stats_persist_period_sec !=
1285
- mutable_db_options_.stats_persist_period_sec) {
1286
- mutex_.Unlock();
1287
- periodic_work_scheduler_->Unregister(this);
1288
- s = periodic_work_scheduler_->Register(
1289
- this, new_options.stats_dump_period_sec,
1290
- new_options.stats_persist_period_sec);
1291
- mutex_.Lock();
1313
+ mutex_.Unlock();
1314
+ if (new_options.stats_dump_period_sec == 0) {
1315
+ s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kDumpStats);
1316
+ } else {
1317
+ s = periodic_task_scheduler_.Register(
1318
+ PeriodicTaskType::kDumpStats,
1319
+ periodic_task_functions_.at(PeriodicTaskType::kDumpStats),
1320
+ new_options.stats_dump_period_sec);
1292
1321
  }
1293
1322
  if (new_options.max_total_wal_size !=
1294
1323
  mutable_db_options_.max_total_wal_size) {
1295
1324
  max_total_wal_size_.store(new_options.max_total_wal_size,
1296
1325
  std::memory_order_release);
1297
1326
  }
1327
+ if (s.ok()) {
1328
+ if (new_options.stats_persist_period_sec == 0) {
1329
+ s = periodic_task_scheduler_.Unregister(
1330
+ PeriodicTaskType::kPersistStats);
1331
+ } else {
1332
+ s = periodic_task_scheduler_.Register(
1333
+ PeriodicTaskType::kPersistStats,
1334
+ periodic_task_functions_.at(PeriodicTaskType::kPersistStats),
1335
+ new_options.stats_persist_period_sec);
1336
+ }
1337
+ }
1338
+ mutex_.Lock();
1339
+ if (!s.ok()) {
1340
+ return s;
1341
+ }
1342
+
1298
1343
  write_controller_.set_max_delayed_write_rate(
1299
1344
  new_options.delayed_write_rate);
1300
1345
  table_cache_.get()->SetCapacity(new_options.max_open_files == -1
@@ -1393,11 +1438,11 @@ Status DBImpl::FlushWAL(bool sync) {
1393
1438
  // future writes
1394
1439
  IOStatusCheck(io_s);
1395
1440
  // whether sync or not, we should abort the rest of function upon error
1396
- return std::move(io_s);
1441
+ return static_cast<Status>(io_s);
1397
1442
  }
1398
1443
  if (!sync) {
1399
1444
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=false");
1400
- return std::move(io_s);
1445
+ return static_cast<Status>(io_s);
1401
1446
  }
1402
1447
  }
1403
1448
  if (!sync) {
@@ -1494,8 +1539,8 @@ Status DBImpl::SyncWAL() {
1494
1539
  Status DBImpl::ApplyWALToManifest(VersionEdit* synced_wals) {
1495
1540
  // not empty, write to MANIFEST.
1496
1541
  mutex_.AssertHeld();
1497
- Status status =
1498
- versions_->LogAndApplyToDefaultColumnFamily(synced_wals, &mutex_);
1542
+ Status status = versions_->LogAndApplyToDefaultColumnFamily(
1543
+ synced_wals, &mutex_, directories_.GetDbDir());
1499
1544
  if (!status.ok() && versions_->io_status().IsIOError()) {
1500
1545
  status = error_handler_.SetBGError(versions_->io_status(),
1501
1546
  BackgroundErrorReason::kManifestWrite);
@@ -1506,7 +1551,7 @@ Status DBImpl::ApplyWALToManifest(VersionEdit* synced_wals) {
1506
1551
  Status DBImpl::LockWAL() {
1507
1552
  log_write_mutex_.Lock();
1508
1553
  auto cur_log_writer = logs_.back().writer;
1509
- auto status = cur_log_writer->WriteBuffer();
1554
+ IOStatus status = cur_log_writer->WriteBuffer();
1510
1555
  if (!status.ok()) {
1511
1556
  ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
1512
1557
  status.ToString().c_str());
@@ -1514,7 +1559,7 @@ Status DBImpl::LockWAL() {
1514
1559
  // future writes
1515
1560
  WriteStatusCheck(status);
1516
1561
  }
1517
- return std::move(status);
1562
+ return static_cast<Status>(status);
1518
1563
  }
1519
1564
 
1520
1565
  Status DBImpl::UnlockWAL() {
@@ -1532,20 +1577,28 @@ void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir,
1532
1577
  auto& wal = *it;
1533
1578
  assert(wal.IsSyncing());
1534
1579
 
1535
- if (logs_.size() > 1) {
1580
+ if (wal.number < logs_.back().number) {
1581
+ // Inactive WAL
1536
1582
  if (immutable_db_options_.track_and_verify_wals_in_manifest &&
1537
1583
  wal.GetPreSyncSize() > 0) {
1538
1584
  synced_wals->AddWal(wal.number, WalMetadata(wal.GetPreSyncSize()));
1539
1585
  }
1540
- logs_to_free_.push_back(wal.ReleaseWriter());
1541
- it = logs_.erase(it);
1586
+ if (wal.GetPreSyncSize() == wal.writer->file()->GetFlushedSize()) {
1587
+ // Fully synced
1588
+ logs_to_free_.push_back(wal.ReleaseWriter());
1589
+ it = logs_.erase(it);
1590
+ } else {
1591
+ assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize());
1592
+ wal.FinishSync();
1593
+ ++it;
1594
+ }
1542
1595
  } else {
1596
+ assert(wal.number == logs_.back().number);
1597
+ // Active WAL
1543
1598
  wal.FinishSync();
1544
1599
  ++it;
1545
1600
  }
1546
1601
  }
1547
- assert(logs_.empty() || logs_[0].number > up_to ||
1548
- (logs_.size() == 1 && !logs_[0].IsSyncing()));
1549
1602
  log_sync_cv_.SignalAll();
1550
1603
  }
1551
1604
 
@@ -1593,7 +1646,6 @@ Status DBImpl::GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
1593
1646
 
1594
1647
  InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
1595
1648
  Arena* arena,
1596
- RangeDelAggregator* range_del_agg,
1597
1649
  SequenceNumber sequence,
1598
1650
  ColumnFamilyHandle* column_family,
1599
1651
  bool allow_unprepared_value) {
@@ -1608,8 +1660,8 @@ InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
1608
1660
  mutex_.Lock();
1609
1661
  SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
1610
1662
  mutex_.Unlock();
1611
- return NewInternalIterator(read_options, cfd, super_version, arena,
1612
- range_del_agg, sequence, allow_unprepared_value);
1663
+ return NewInternalIterator(read_options, cfd, super_version, arena, sequence,
1664
+ allow_unprepared_value);
1613
1665
  }
1614
1666
 
1615
1667
  void DBImpl::SchedulePurge() {
@@ -1740,48 +1792,53 @@ static void CleanupGetMergeOperandsState(void* arg1, void* /*arg2*/) {
1740
1792
 
1741
1793
  } // namespace
1742
1794
 
1743
- InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
1744
- ColumnFamilyData* cfd,
1745
- SuperVersion* super_version,
1746
- Arena* arena,
1747
- RangeDelAggregator* range_del_agg,
1748
- SequenceNumber sequence,
1749
- bool allow_unprepared_value) {
1795
+ InternalIterator* DBImpl::NewInternalIterator(
1796
+ const ReadOptions& read_options, ColumnFamilyData* cfd,
1797
+ SuperVersion* super_version, Arena* arena, SequenceNumber sequence,
1798
+ bool allow_unprepared_value, ArenaWrappedDBIter* db_iter) {
1750
1799
  InternalIterator* internal_iter;
1751
1800
  assert(arena != nullptr);
1752
- assert(range_del_agg != nullptr);
1753
1801
  // Need to create internal iterator from the arena.
1754
1802
  MergeIteratorBuilder merge_iter_builder(
1755
1803
  &cfd->internal_comparator(), arena,
1756
1804
  !read_options.total_order_seek &&
1757
1805
  super_version->mutable_cf_options.prefix_extractor != nullptr);
1758
- // Collect iterator for mutable mem
1759
- merge_iter_builder.AddIterator(
1760
- super_version->mem->NewIterator(read_options, arena));
1761
- std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
1806
+ // Collect iterator for mutable memtable
1807
+ auto mem_iter = super_version->mem->NewIterator(read_options, arena);
1762
1808
  Status s;
1763
1809
  if (!read_options.ignore_range_deletions) {
1764
- range_del_iter.reset(super_version->mem->NewRangeTombstoneIterator(
1765
- read_options, sequence, false /* immutable_memtable */));
1766
- range_del_agg->AddTombstones(std::move(range_del_iter));
1810
+ TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
1811
+ auto range_del_iter = super_version->mem->NewRangeTombstoneIterator(
1812
+ read_options, sequence, false /* immutable_memtable */);
1813
+ if (range_del_iter == nullptr || range_del_iter->empty()) {
1814
+ delete range_del_iter;
1815
+ } else {
1816
+ mem_tombstone_iter = new TruncatedRangeDelIterator(
1817
+ std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
1818
+ &cfd->ioptions()->internal_comparator, nullptr /* smallest */,
1819
+ nullptr /* largest */);
1820
+ }
1821
+ merge_iter_builder.AddPointAndTombstoneIterator(mem_iter,
1822
+ mem_tombstone_iter);
1823
+ } else {
1824
+ merge_iter_builder.AddIterator(mem_iter);
1767
1825
  }
1826
+
1768
1827
  // Collect all needed child iterators for immutable memtables
1769
1828
  if (s.ok()) {
1770
- super_version->imm->AddIterators(read_options, &merge_iter_builder);
1771
- if (!read_options.ignore_range_deletions) {
1772
- s = super_version->imm->AddRangeTombstoneIterators(read_options, arena,
1773
- range_del_agg);
1774
- }
1829
+ super_version->imm->AddIterators(read_options, &merge_iter_builder,
1830
+ !read_options.ignore_range_deletions);
1775
1831
  }
1776
1832
  TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s);
1777
1833
  if (s.ok()) {
1778
1834
  // Collect iterators for files in L0 - Ln
1779
1835
  if (read_options.read_tier != kMemtableTier) {
1780
1836
  super_version->current->AddIterators(read_options, file_options_,
1781
- &merge_iter_builder, range_del_agg,
1837
+ &merge_iter_builder,
1782
1838
  allow_unprepared_value);
1783
1839
  }
1784
- internal_iter = merge_iter_builder.Finish();
1840
+ internal_iter = merge_iter_builder.Finish(
1841
+ read_options.ignore_range_deletions ? nullptr : db_iter);
1785
1842
  SuperVersionHandle* cleanup = new SuperVersionHandle(
1786
1843
  this, &mutex_, super_version,
1787
1844
  read_options.background_purge_on_iterator_cleanup ||
@@ -3035,12 +3092,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
3035
3092
  } // InstrumentedMutexLock l(&mutex_)
3036
3093
 
3037
3094
  if (cf_options.preclude_last_level_data_seconds > 0) {
3038
- // TODO(zjay): Fix the timer issue and re-enable this.
3039
- ROCKS_LOG_ERROR(
3040
- immutable_db_options_.info_log,
3041
- "Creating column family with `preclude_last_level_data_seconds` needs "
3042
- "to restart DB to take effect");
3043
- // s = RegisterRecordSeqnoTimeWorker();
3095
+ s = RegisterRecordSeqnoTimeWorker();
3044
3096
  }
3045
3097
  sv_context.Clean();
3046
3098
  // this is outside the mutex
@@ -3106,7 +3158,7 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) {
3106
3158
  WriteThread::Writer w;
3107
3159
  write_thread_.EnterUnbatched(&w, &mutex_);
3108
3160
  s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit,
3109
- &mutex_);
3161
+ &mutex_, directories_.GetDbDir());
3110
3162
  write_thread_.ExitUnbatched(&w);
3111
3163
  }
3112
3164
  if (s.ok()) {
@@ -3311,9 +3363,8 @@ ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options,
3311
3363
  read_options.snapshot != nullptr ? false : allow_refresh);
3312
3364
 
3313
3365
  InternalIterator* internal_iter = NewInternalIterator(
3314
- db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(),
3315
- db_iter->GetRangeDelAggregator(), snapshot,
3316
- /* allow_unprepared_value */ true);
3366
+ db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(), snapshot,
3367
+ /* allow_unprepared_value */ true, db_iter);
3317
3368
  db_iter->SetIterUnderDBIter(internal_iter);
3318
3369
 
3319
3370
  return db_iter;
@@ -32,6 +32,7 @@
32
32
  #include "db/log_writer.h"
33
33
  #include "db/logs_with_prep_tracker.h"
34
34
  #include "db/memtable_list.h"
35
+ #include "db/periodic_task_scheduler.h"
35
36
  #include "db/post_memtable_callback.h"
36
37
  #include "db/pre_release_callback.h"
37
38
  #include "db/range_del_aggregator.h"
@@ -75,10 +76,6 @@ class ArenaWrappedDBIter;
75
76
  class InMemoryStatsHistoryIterator;
76
77
  class MemTable;
77
78
  class PersistentStatsHistoryIterator;
78
- class PeriodicWorkScheduler;
79
- #ifndef NDEBUG
80
- class PeriodicWorkTestScheduler;
81
- #endif // !NDEBUG
82
79
  class TableCache;
83
80
  class TaskLimiterToken;
84
81
  class Version;
@@ -742,13 +739,29 @@ class DBImpl : public DB {
742
739
  // the value and so will require PrepareValue() to be called before value();
743
740
  // allow_unprepared_value = false is convenient when this optimization is not
744
741
  // useful, e.g. when reading the whole column family.
742
+ //
743
+ // read_options.ignore_range_deletions determines whether range tombstones are
744
+ // processed in the returned interator internally, i.e., whether range
745
+ // tombstone covered keys are in this iterator's output.
745
746
  // @param read_options Must outlive the returned iterator.
746
747
  InternalIterator* NewInternalIterator(
747
- const ReadOptions& read_options, Arena* arena,
748
- RangeDelAggregator* range_del_agg, SequenceNumber sequence,
748
+ const ReadOptions& read_options, Arena* arena, SequenceNumber sequence,
749
749
  ColumnFamilyHandle* column_family = nullptr,
750
750
  bool allow_unprepared_value = false);
751
751
 
752
+ // Note: to support DB iterator refresh, memtable range tombstones in the
753
+ // underlying merging iterator needs to be refreshed. If db_iter is not
754
+ // nullptr, db_iter->SetMemtableRangetombstoneIter() is called with the
755
+ // memtable range tombstone iterator used by the underlying merging iterator.
756
+ // This range tombstone iterator can be refreshed later by db_iter.
757
+ // @param read_options Must outlive the returned iterator.
758
+ InternalIterator* NewInternalIterator(const ReadOptions& read_options,
759
+ ColumnFamilyData* cfd,
760
+ SuperVersion* super_version,
761
+ Arena* arena, SequenceNumber sequence,
762
+ bool allow_unprepared_value,
763
+ ArenaWrappedDBIter* db_iter = nullptr);
764
+
752
765
  LogsWithPrepTracker* logs_with_prep_tracker() {
753
766
  return &logs_with_prep_tracker_;
754
767
  }
@@ -871,15 +884,6 @@ class DBImpl : public DB {
871
884
 
872
885
  const WriteController& write_controller() { return write_controller_; }
873
886
 
874
- // @param read_options Must outlive the returned iterator.
875
- InternalIterator* NewInternalIterator(const ReadOptions& read_options,
876
- ColumnFamilyData* cfd,
877
- SuperVersion* super_version,
878
- Arena* arena,
879
- RangeDelAggregator* range_del_agg,
880
- SequenceNumber sequence,
881
- bool allow_unprepared_value);
882
-
883
887
  // hollow transactions shell used for recovery.
884
888
  // these will then be passed to TransactionDB so that
885
889
  // locks can be reacquired before writing can resume.
@@ -1147,7 +1151,7 @@ class DBImpl : public DB {
1147
1151
  int TEST_BGCompactionsAllowed() const;
1148
1152
  int TEST_BGFlushesAllowed() const;
1149
1153
  size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
1150
- void TEST_WaitForPeridicWorkerRun(std::function<void()> callback) const;
1154
+ void TEST_WaitForPeridicTaskRun(std::function<void()> callback) const;
1151
1155
  SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const;
1152
1156
  size_t TEST_EstimateInMemoryStatsHistorySize() const;
1153
1157
 
@@ -1162,7 +1166,7 @@ class DBImpl : public DB {
1162
1166
  }
1163
1167
 
1164
1168
  #ifndef ROCKSDB_LITE
1165
- PeriodicWorkTestScheduler* TEST_GetPeriodicWorkScheduler() const;
1169
+ const PeriodicTaskScheduler& TEST_GetPeriodicTaskScheduler() const;
1166
1170
  #endif // !ROCKSDB_LITE
1167
1171
 
1168
1172
  #endif // NDEBUG
@@ -1390,7 +1394,7 @@ class DBImpl : public DB {
1390
1394
  void NotifyOnExternalFileIngested(
1391
1395
  ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job);
1392
1396
 
1393
- Status FlushForGetLiveFiles();
1397
+ virtual Status FlushForGetLiveFiles();
1394
1398
  #endif // !ROCKSDB_LITE
1395
1399
 
1396
1400
  void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const;
@@ -2069,7 +2073,7 @@ class DBImpl : public DB {
2069
2073
  LogBuffer* log_buffer);
2070
2074
 
2071
2075
  // Schedule background tasks
2072
- Status StartPeriodicWorkScheduler();
2076
+ Status StartPeriodicTaskScheduler();
2073
2077
 
2074
2078
  Status RegisterRecordSeqnoTimeWorker();
2075
2079
 
@@ -2175,11 +2179,6 @@ class DBImpl : public DB {
2175
2179
  IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
2176
2180
  size_t preallocate_block_size, log::Writer** new_log);
2177
2181
 
2178
- // Verify SST file unique id between Manifest and table properties to make
2179
- // sure they're the same. Currently only used during DB open when
2180
- // `verify_sst_unique_id_in_manifest = true`.
2181
- Status VerifySstUniqueIdInManifest();
2182
-
2183
2182
  // Validate self-consistency of DB options
2184
2183
  static Status ValidateOptions(const DBOptions& db_options);
2185
2184
  // Validate self-consistency of DB options and its consistency with cf options
@@ -2611,14 +2610,11 @@ class DBImpl : public DB {
2611
2610
 
2612
2611
  #ifndef ROCKSDB_LITE
2613
2612
  // Scheduler to run DumpStats(), PersistStats(), and FlushInfoLog().
2614
- // Currently, it always use a global instance from
2615
- // PeriodicWorkScheduler::Default(). Only in unittest, it can be overrided by
2616
- // PeriodicWorkTestScheduler.
2617
- PeriodicWorkScheduler* periodic_work_scheduler_;
2618
-
2619
- // Current cadence of the periodic worker for recording sequence number to
2620
- // time.
2621
- uint64_t record_seqno_time_cadence_ = 0;
2613
+ // Currently, internally it has a global timer instance for running the tasks.
2614
+ PeriodicTaskScheduler periodic_task_scheduler_;
2615
+
2616
+ // It contains the implementations for each periodic task.
2617
+ std::map<PeriodicTaskType, const PeriodicTaskFunc> periodic_task_functions_;
2622
2618
  #endif
2623
2619
 
2624
2620
  // When set, we use a separate queue for writes that don't write to memtable.
@@ -988,7 +988,7 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
988
988
  }
989
989
 
990
990
  Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
991
- &edit, &mutex_);
991
+ &edit, &mutex_, directories_.GetDbDir());
992
992
  if (!s.ok()) {
993
993
  return s;
994
994
  }