@nxtedition/rocksdb 13.5.12 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -1069,7 +1069,7 @@ void DBImpl::DumpStats() {
1069
1069
  {
1070
1070
  InstrumentedMutexLock l(&mutex_);
1071
1071
  for (auto cfd : versions_->GetRefedColumnFamilySet()) {
1072
- if (!cfd->initialized()) {
1072
+ if (!cfd->initialized() || cfd->IsDropped()) {
1073
1073
  continue;
1074
1074
  }
1075
1075
 
@@ -1245,13 +1245,11 @@ Status DBImpl::SetOptions(
1245
1245
  WriteOptionsFile(write_options, true /*db_mutex_already_held*/);
1246
1246
  bg_cv_.SignalAll();
1247
1247
 
1248
- #if __cplusplus >= 202002L
1249
1248
  assert(new_options_copy == cfd->GetLatestMutableCFOptions());
1250
1249
  assert(cfd->GetLatestMutableCFOptions() ==
1251
1250
  cfd->GetCurrentMutableCFOptions());
1252
1251
  assert(cfd->GetCurrentMutableCFOptions() ==
1253
1252
  cfd->current()->GetMutableCFOptions());
1254
- #endif
1255
1253
  }
1256
1254
  }
1257
1255
  sv_context.Clean();
@@ -3835,7 +3833,7 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options,
3835
3833
 
3836
3834
  std::unique_ptr<MultiScan> DBImpl::NewMultiScan(
3837
3835
  const ReadOptions& _read_options, ColumnFamilyHandle* column_family,
3838
- const std::vector<ScanOptions>& scan_opts) {
3836
+ const MultiScanArgs& scan_opts) {
3839
3837
  std::unique_ptr<MultiScan> ms_iter = std::make_unique<MultiScan>(
3840
3838
  _read_options, scan_opts, this, column_family);
3841
3839
  return ms_iter;
@@ -4345,7 +4343,7 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
4345
4343
  CfdList cf_scheduled;
4346
4344
  if (oldest_snapshot > bottommost_files_mark_threshold_) {
4347
4345
  for (auto* cfd : *versions_->GetColumnFamilySet()) {
4348
- if (!cfd->ioptions().allow_ingest_behind) {
4346
+ if (!cfd->AllowIngestBehind()) {
4349
4347
  cfd->current()->storage_info()->UpdateOldestSnapshot(
4350
4348
  oldest_snapshot, /*allow_ingest_behind=*/false);
4351
4349
  if (!cfd->current()
@@ -4365,8 +4363,7 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
4365
4363
  // inaccurate.
4366
4364
  SequenceNumber new_bottommost_files_mark_threshold = kMaxSequenceNumber;
4367
4365
  for (auto* cfd : *versions_->GetColumnFamilySet()) {
4368
- if (CfdListContains(cf_scheduled, cfd) ||
4369
- cfd->ioptions().allow_ingest_behind) {
4366
+ if (CfdListContains(cf_scheduled, cfd) || cfd->AllowIngestBehind()) {
4370
4367
  continue;
4371
4368
  }
4372
4369
  new_bottommost_files_mark_threshold = std::min(
@@ -5761,10 +5758,6 @@ Status DBImpl::IngestExternalFiles(
5761
5758
  for (const auto& arg : args) {
5762
5759
  const IngestExternalFileOptions& ingest_opts = arg.options;
5763
5760
  if (ingest_opts.ingest_behind) {
5764
- if (!immutable_db_options_.allow_ingest_behind) {
5765
- return Status::InvalidArgument(
5766
- "can't ingest_behind file in DB with allow_ingest_behind=false");
5767
- }
5768
5761
  auto ucmp = arg.column_family->GetComparator();
5769
5762
  assert(ucmp);
5770
5763
  if (ucmp->timestamp_size() > 0) {
@@ -5772,6 +5765,14 @@ Status DBImpl::IngestExternalFiles(
5772
5765
  "Column family with user-defined "
5773
5766
  "timestamps enabled doesn't support ingest behind.");
5774
5767
  }
5768
+
5769
+ if (!static_cast<ColumnFamilyHandleImpl*>(arg.column_family)
5770
+ ->cfd()
5771
+ ->AllowIngestBehind()) {
5772
+ return Status::InvalidArgument(
5773
+ "Can't ingest_behind file in ColumnFamily %s with "
5774
+ "cf_allow_ingest_behind=false");
5775
+ }
5775
5776
  }
5776
5777
  if (arg.atomic_replace_range.has_value()) {
5777
5778
  if (ingest_opts.ingest_behind) {
@@ -6006,18 +6007,19 @@ Status DBImpl::IngestExternalFiles(
6006
6007
  // mutex when persisting MANIFEST file, and the snapshots taken during
6007
6008
  // that period will not be stable if VersionSet last seqno is updated
6008
6009
  // before LogAndApply.
6009
- int consumed_seqno_count =
6010
- ingestion_jobs[0].ConsumedSequenceNumbersCount();
6010
+ SequenceNumber max_assigned_seqno =
6011
+ ingestion_jobs[0].MaxAssignedSequenceNumber();
6011
6012
  for (size_t i = 1; i != num_cfs; ++i) {
6012
- consumed_seqno_count =
6013
- std::max(consumed_seqno_count,
6014
- ingestion_jobs[i].ConsumedSequenceNumbersCount());
6013
+ max_assigned_seqno = std::max(
6014
+ max_assigned_seqno, ingestion_jobs[i].MaxAssignedSequenceNumber());
6015
6015
  }
6016
- if (consumed_seqno_count > 0) {
6016
+ if (max_assigned_seqno > 0) {
6017
6017
  const SequenceNumber last_seqno = versions_->LastSequence();
6018
- versions_->SetLastAllocatedSequence(last_seqno + consumed_seqno_count);
6019
- versions_->SetLastPublishedSequence(last_seqno + consumed_seqno_count);
6020
- versions_->SetLastSequence(last_seqno + consumed_seqno_count);
6018
+ if (max_assigned_seqno > last_seqno) {
6019
+ versions_->SetLastAllocatedSequence(max_assigned_seqno);
6020
+ versions_->SetLastPublishedSequence(max_assigned_seqno);
6021
+ versions_->SetLastSequence(max_assigned_seqno);
6022
+ }
6021
6023
  }
6022
6024
  }
6023
6025
 
@@ -386,7 +386,7 @@ class DBImpl : public DB {
386
386
  using DB::NewMultiScan;
387
387
  std::unique_ptr<MultiScan> NewMultiScan(
388
388
  const ReadOptions& _read_options, ColumnFamilyHandle* column_family,
389
- const std::vector<ScanOptions>& scan_opts) override;
389
+ const MultiScanArgs& scan_opts) override;
390
390
 
391
391
  const Snapshot* GetSnapshot() override;
392
392
  void ReleaseSnapshot(const Snapshot* snapshot) override;
@@ -1388,6 +1388,9 @@ class DBImpl : public DB {
1388
1388
  // logs_, cur_wal_number_. Refer to the definition of each variable below for
1389
1389
  // more description.
1390
1390
  //
1391
+ // Protects access to most ColumnFamilyData methods, see more in comment for
1392
+ // each method.
1393
+ //
1391
1394
  // `mutex_` can be a hot lock in some workloads, so it deserves dedicated
1392
1395
  // cachelines.
1393
1396
  mutable CacheAlignedInstrumentedMutex mutex_;
@@ -1111,8 +1111,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1111
1111
  cfd->NumberLevels() > 1) {
1112
1112
  // Always compact all files together.
1113
1113
  final_output_level = cfd->NumberLevels() - 1;
1114
- // if bottom most level is reserved
1115
- if (immutable_db_options_.allow_ingest_behind) {
1114
+ if (cfd->AllowIngestBehind()) {
1116
1115
  final_output_level--;
1117
1116
  }
1118
1117
  s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
@@ -1460,7 +1459,7 @@ Status DBImpl::CompactFilesImpl(
1460
1459
  }
1461
1460
  }
1462
1461
 
1463
- if (cfd->ioptions().allow_ingest_behind &&
1462
+ if (cfd->AllowIngestBehind() &&
1464
1463
  output_level >= cfd->ioptions().num_levels - 1) {
1465
1464
  return Status::InvalidArgument(
1466
1465
  "Exceed the maximum output level defined by "
@@ -1500,7 +1499,7 @@ Status DBImpl::CompactFilesImpl(
1500
1499
 
1501
1500
  std::unique_ptr<Compaction> c;
1502
1501
  assert(cfd->compaction_picker());
1503
- c.reset(cfd->compaction_picker()->CompactFiles(
1502
+ c.reset(cfd->compaction_picker()->PickCompactionForCompactFiles(
1504
1503
  compact_options, input_files, output_level, version->storage_info(),
1505
1504
  cfd->GetLatestMutableCFOptions(), mutable_db_options_, output_path_id));
1506
1505
  // we already sanitized the set of input files and checked for conflicts
@@ -4155,6 +4154,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
4155
4154
  ->current()
4156
4155
  ->storage_info()
4157
4156
  ->MaxOutputLevel(
4157
+ c->immutable_options().cf_allow_ingest_behind ||
4158
4158
  immutable_db_options_.allow_ingest_behind)) &&
4159
4159
  env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
4160
4160
  assert(thread_pri == Env::Priority::LOW);
@@ -4660,7 +4660,7 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
4660
4660
  bottommost_files_mark_threshold_ = kMaxSequenceNumber;
4661
4661
  standalone_range_deletion_files_mark_threshold_ = kMaxSequenceNumber;
4662
4662
  for (auto* my_cfd : *versions_->GetColumnFamilySet()) {
4663
- if (!my_cfd->ioptions().allow_ingest_behind) {
4663
+ if (!my_cfd->AllowIngestBehind()) {
4664
4664
  bottommost_files_mark_threshold_ = std::min(
4665
4665
  bottommost_files_mark_threshold_,
4666
4666
  my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
@@ -599,7 +599,7 @@ Status DBImpl::Recover(
599
599
  // allow_ingest_behind does not support Level Compaction,
600
600
  // and per_key_placement can have infinite compaction loop for Level
601
601
  // Compaction. Adjust to_level here just to be safe.
602
- if (cfd->ioptions().allow_ingest_behind ||
602
+ if (cfd->AllowIngestBehind() ||
603
603
  moptions.preclude_last_level_data_seconds > 0) {
604
604
  to_level -= 1;
605
605
  }
@@ -1755,8 +1755,12 @@ Status DBImpl::MaybeHandleStopReplayForCorruptionForInconsistency(
1755
1755
  ROCKS_LOG_ERROR(immutable_db_options_.info_log,
1756
1756
  "Column family inconsistency: SST file contains data"
1757
1757
  " beyond the point of corruption.");
1758
- status = Status::Corruption("SST file is ahead of WALs in CF " +
1759
- cfd->GetName());
1758
+ status = Status::Corruption(
1759
+ "Column family inconsistency: SST file contains data"
1760
+ " beyond the point of corruption in CF " +
1761
+ cfd->GetName() +
1762
+ ". WAL recovery stopped at corruption point, but SST files"
1763
+ " contain newer data.");
1760
1764
  return status;
1761
1765
  }
1762
1766
  }
@@ -879,7 +879,7 @@ Status DBImplSecondary::CompactWithoutInstallation(
879
879
  // input instead of recreating it in the remote worker
880
880
  std::unique_ptr<Compaction> c;
881
881
  assert(cfd->compaction_picker());
882
- c.reset(cfd->compaction_picker()->CompactFiles(
882
+ c.reset(cfd->compaction_picker()->PickCompactionForCompactFiles(
883
883
  comp_options, input_files, input.output_level, vstorage,
884
884
  cfd->GetLatestMutableCFOptions(), mutable_db_options_, 0,
885
885
  /*earliest_snapshot=*/job_context.snapshot_seqs.empty()
@@ -1565,11 +1565,115 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
1565
1565
  }
1566
1566
  }
1567
1567
 
1568
+ Status DBIter::ValidateScanOptions(const MultiScanArgs& multiscan_opts) const {
1569
+ if (multiscan_opts.empty()) {
1570
+ return Status::InvalidArgument("Empty MultiScanArgs");
1571
+ }
1572
+
1573
+ const std::vector<ScanOptions>& scan_opts = multiscan_opts.GetScanRanges();
1574
+ const bool has_limit = scan_opts.front().range.limit.has_value();
1575
+ if (!has_limit && scan_opts.size() > 1) {
1576
+ return Status::InvalidArgument("Scan has no upper bound");
1577
+ }
1578
+
1579
+ for (size_t i = 0; i < scan_opts.size(); ++i) {
1580
+ const auto& scan_range = scan_opts[i].range;
1581
+ if (!scan_range.start.has_value()) {
1582
+ return Status::InvalidArgument("Scan has no start key at index " +
1583
+ std::to_string(i));
1584
+ }
1585
+
1586
+ if (scan_range.limit.has_value()) {
1587
+ if (user_comparator_.CompareWithoutTimestamp(
1588
+ scan_range.start.value(), /*a_has_ts=*/false,
1589
+ scan_range.limit.value(), /*b_has_ts=*/false) >= 0) {
1590
+ return Status::InvalidArgument(
1591
+ "Scan start key is large or equal than limit at index " +
1592
+ std::to_string(i));
1593
+ }
1594
+ }
1595
+
1596
+ if (i > 0) {
1597
+ if (!scan_range.limit.has_value()) {
1598
+ // multiple scan without limit scan ranges
1599
+ return Status::InvalidArgument("Scan has no upper bound at index " +
1600
+ std::to_string(i));
1601
+ }
1602
+
1603
+ const auto& last_end_key = scan_opts[i - 1].range.limit.value();
1604
+ if (user_comparator_.CompareWithoutTimestamp(
1605
+ scan_range.start.value(), /*a_has_ts=*/false, last_end_key,
1606
+ /*b_has_ts=*/false) < 0) {
1607
+ return Status::InvalidArgument("Overlapping ranges at index " +
1608
+ std::to_string(i));
1609
+ }
1610
+ }
1611
+ }
1612
+ return Status::OK();
1613
+ }
1614
+
1615
+ void DBIter::Prepare(const MultiScanArgs& scan_opts) {
1616
+ status_ = ValidateScanOptions(scan_opts);
1617
+ if (!status_.ok()) {
1618
+ return;
1619
+ }
1620
+ std::optional<MultiScanArgs> new_scan_opts;
1621
+ new_scan_opts.emplace(scan_opts);
1622
+ scan_opts_.swap(new_scan_opts);
1623
+ scan_index_ = 0;
1624
+ if (!scan_opts.empty()) {
1625
+ iter_.Prepare(&scan_opts_.value());
1626
+ } else {
1627
+ iter_.Prepare(nullptr);
1628
+ }
1629
+ }
1630
+
1568
1631
  void DBIter::Seek(const Slice& target) {
1569
1632
  PERF_COUNTER_ADD(iter_seek_count, 1);
1570
1633
  PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
1571
1634
  StopWatch sw(clock_, statistics_, DB_SEEK);
1572
1635
 
1636
+ if (scan_opts_.has_value()) {
1637
+ // Validate the seek target is as expected in the previously prepared range
1638
+ auto const& scan_ranges = scan_opts_.value().GetScanRanges();
1639
+ if (scan_index_ >= scan_ranges.size()) {
1640
+ status_ = Status::InvalidArgument(
1641
+ "Seek called after exhausting all of the scan ranges");
1642
+ valid_ = false;
1643
+ return;
1644
+ }
1645
+
1646
+ // Validate start key of next prepare range matches the seek target
1647
+ auto const& range = scan_ranges[scan_index_];
1648
+ auto const& start = range.range.start;
1649
+ assert(start.has_value());
1650
+ if (user_comparator_.CompareWithoutTimestamp(target, *start) != 0) {
1651
+ status_ = Status::InvalidArgument(
1652
+ "Seek target does not match the start of the next prepared range at "
1653
+ "index " +
1654
+ std::to_string(scan_index_));
1655
+ valid_ = false;
1656
+ return;
1657
+ }
1658
+
1659
+ // validate the upper bound is set to the same value of limit, if limit
1660
+ // exists
1661
+ auto const& limit = range.range.limit;
1662
+ if (limit.has_value()) {
1663
+ if (iterate_upper_bound_ == nullptr ||
1664
+ user_comparator_.CompareWithoutTimestamp(
1665
+ limit.value(), *iterate_upper_bound_) != 0) {
1666
+ status_ = Status::InvalidArgument(
1667
+ "Upper bound is not set to the same limit value of the next "
1668
+ "prepared range at index " +
1669
+ std::to_string(scan_index_));
1670
+ valid_ = false;
1671
+ return;
1672
+ }
1673
+ }
1674
+ scan_index_++;
1675
+ }
1676
+
1573
1677
  if (cfh_ != nullptr) {
1574
1678
  // TODO: What do we do if this returns an error?
1575
1679
  Slice lower_bound, upper_bound;
@@ -240,16 +240,8 @@ class DBIter final : public Iterator {
240
240
 
241
241
  bool PrepareValue() override;
242
242
 
243
- void Prepare(const std::vector<ScanOptions>& scan_opts) override {
244
- std::optional<std::vector<ScanOptions>> new_scan_opts;
245
- new_scan_opts.emplace(scan_opts);
246
- scan_opts_.swap(new_scan_opts);
247
- if (!scan_opts.empty()) {
248
- iter_.Prepare(&scan_opts_.value());
249
- } else {
250
- iter_.Prepare(nullptr);
251
- }
252
- }
243
+ void Prepare(const MultiScanArgs& scan_opts) override;
244
+ Status ValidateScanOptions(const MultiScanArgs& multiscan_opts) const;
253
245
 
254
246
  private:
255
247
  DBIter(Env* _env, const ReadOptions& read_options,
@@ -505,7 +497,8 @@ class DBIter final : public Iterator {
505
497
  const Slice* const timestamp_lb_;
506
498
  const size_t timestamp_size_;
507
499
  std::string saved_timestamp_;
508
- std::optional<std::vector<ScanOptions>> scan_opts_;
500
+ std::optional<MultiScanArgs> scan_opts_;
501
+ size_t scan_index_{0};
509
502
  ReadOnlyMemTable* const active_mem_;
510
503
  SequenceNumber memtable_seqno_lb_;
511
504
  uint32_t memtable_op_scan_flush_trigger_;