@nxtedition/rocksdb 13.5.13 → 15.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +55 -180
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +18 -42
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -82,6 +82,7 @@ using ROCKSDB_NAMESPACE::DbPath;
82
82
  using ROCKSDB_NAMESPACE::Env;
83
83
  using ROCKSDB_NAMESPACE::EnvOptions;
84
84
  using ROCKSDB_NAMESPACE::EventListener;
85
+ using ROCKSDB_NAMESPACE::ExportImportFilesMetaData;
85
86
  using ROCKSDB_NAMESPACE::ExternalFileIngestionInfo;
86
87
  using ROCKSDB_NAMESPACE::FileLock;
87
88
  using ROCKSDB_NAMESPACE::FilterPolicy;
@@ -89,6 +90,7 @@ using ROCKSDB_NAMESPACE::FlushJobInfo;
89
90
  using ROCKSDB_NAMESPACE::FlushOptions;
90
91
  using ROCKSDB_NAMESPACE::HistogramData;
91
92
  using ROCKSDB_NAMESPACE::HyperClockCacheOptions;
93
+ using ROCKSDB_NAMESPACE::ImportColumnFamilyOptions;
92
94
  using ROCKSDB_NAMESPACE::InfoLogLevel;
93
95
  using ROCKSDB_NAMESPACE::IngestExternalFileOptions;
94
96
  using ROCKSDB_NAMESPACE::Iterator;
@@ -245,6 +247,9 @@ struct rocksdb_write_buffer_manager_t {
245
247
  struct rocksdb_sst_file_manager_t {
246
248
  std::shared_ptr<SstFileManager> rep;
247
249
  };
250
+ struct rocksdb_livefile_t {
251
+ LiveFileMetaData rep;
252
+ };
248
253
  struct rocksdb_livefiles_t {
249
254
  std::vector<LiveFileMetaData> rep;
250
255
  };
@@ -255,6 +260,12 @@ struct rocksdb_column_family_handle_t {
255
260
  struct rocksdb_column_family_metadata_t {
256
261
  ColumnFamilyMetaData rep;
257
262
  };
263
+ struct rocksdb_export_import_files_metadata_t {
264
+ ExportImportFilesMetaData* rep;
265
+ };
266
+ struct rocksdb_import_column_family_options_t {
267
+ ImportColumnFamilyOptions rep;
268
+ };
258
269
  struct rocksdb_level_metadata_t {
259
270
  const LevelMetaData* rep;
260
271
  };
@@ -947,6 +958,22 @@ void rocksdb_checkpoint_create(rocksdb_checkpoint_t* checkpoint,
947
958
  std::string(checkpoint_dir), log_size_for_flush));
948
959
  }
949
960
 
961
+ rocksdb_export_import_files_metadata_t* rocksdb_checkpoint_export_column_family(
962
+ rocksdb_checkpoint_t* checkpoint,
963
+ rocksdb_column_family_handle_t* column_family, const char* export_dir,
964
+ char** errptr) {
965
+ ExportImportFilesMetaData* metadata = nullptr;
966
+ if (SaveError(errptr,
967
+ checkpoint->rep->ExportColumnFamily(
968
+ column_family->rep, std::string(export_dir), &metadata))) {
969
+ return nullptr;
970
+ }
971
+ rocksdb_export_import_files_metadata_t* result =
972
+ new rocksdb_export_import_files_metadata_t;
973
+ result->rep = metadata;
974
+ return result;
975
+ }
976
+
950
977
  void rocksdb_checkpoint_object_destroy(rocksdb_checkpoint_t* checkpoint) {
951
978
  delete checkpoint->rep;
952
979
  delete checkpoint;
@@ -1190,6 +1217,26 @@ rocksdb_column_family_handle_t** rocksdb_create_column_families(
1190
1217
  return c_handles;
1191
1218
  }
1192
1219
 
1220
+ rocksdb_column_family_handle_t* rocksdb_create_column_family_with_import(
1221
+ rocksdb_t* db, rocksdb_options_t* column_family_options,
1222
+ const char* column_family_name,
1223
+ rocksdb_import_column_family_options_t* import_options,
1224
+ rocksdb_export_import_files_metadata_t* export_import_files_metadata,
1225
+ char** errptr) {
1226
+ rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t;
1227
+ handle->rep = nullptr;
1228
+ if (SaveError(errptr,
1229
+ db->rep->CreateColumnFamilyWithImport(
1230
+ ColumnFamilyOptions(column_family_options->rep),
1231
+ std::string(column_family_name), import_options->rep,
1232
+ *(export_import_files_metadata->rep), &(handle->rep)))) {
1233
+ delete handle;
1234
+ return nullptr;
1235
+ }
1236
+ handle->immortal = false;
1237
+ return handle;
1238
+ }
1239
+
1193
1240
  void rocksdb_create_column_families_destroy(
1194
1241
  rocksdb_column_family_handle_t** list) {
1195
1242
  free(list);
@@ -3639,6 +3686,11 @@ void rocksdb_options_set_write_buffer_manager(
3639
3686
  opt->rep.write_buffer_manager = wbm->rep;
3640
3687
  }
3641
3688
 
3689
+ void rocksdb_options_set_sst_file_manager(rocksdb_options_t* opt,
3690
+ rocksdb_sst_file_manager_t* sfm) {
3691
+ opt->rep.sst_file_manager = sfm->rep;
3692
+ }
3693
+
3642
3694
  size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t* opt) {
3643
3695
  return opt->rep.write_buffer_size;
3644
3696
  }
@@ -4746,6 +4798,15 @@ void rocksdb_options_add_compact_on_deletion_collector_factory_del_ratio(
4746
4798
  opt->rep.table_properties_collector_factories.emplace_back(compact_on_del);
4747
4799
  }
4748
4800
 
4801
+ void rocksdb_options_add_compact_on_deletion_collector_factory_min_file_size(
4802
+ rocksdb_options_t* opt, size_t window_size, size_t num_dels_trigger,
4803
+ double deletion_ratio, uint64_t min_file_size) {
4804
+ std::shared_ptr<ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory>
4805
+ compact_on_del = NewCompactOnDeletionCollectorFactory(
4806
+ window_size, num_dels_trigger, deletion_ratio, min_file_size);
4807
+ opt->rep.table_properties_collector_factories.emplace_back(compact_on_del);
4808
+ }
4809
+
4749
4810
  void rocksdb_set_perf_level(int v) {
4750
4811
  PerfLevel level = static_cast<PerfLevel>(v);
4751
4812
  SetPerfLevel(level);
@@ -6195,6 +6256,10 @@ void rocksdb_options_set_min_level_to_compress(rocksdb_options_t* opt,
6195
6256
  }
6196
6257
  }
6197
6258
 
6259
+ rocksdb_livefiles_t* rocksdb_livefiles_create() {
6260
+ return new rocksdb_livefiles_t;
6261
+ }
6262
+
6198
6263
  int rocksdb_livefiles_count(const rocksdb_livefiles_t* lf) {
6199
6264
  return static_cast<int>(lf->rep.size());
6200
6265
  }
@@ -6208,6 +6273,16 @@ const char* rocksdb_livefiles_name(const rocksdb_livefiles_t* lf, int index) {
6208
6273
  return lf->rep[index].name.c_str();
6209
6274
  }
6210
6275
 
6276
+ const char* rocksdb_livefiles_directory(const rocksdb_livefiles_t* lf,
6277
+ int index) {
6278
+ if (lf->rep[index].directory.empty()) {
6279
+ // db_path is deprecated but still returned by some code paths
6280
+ return lf->rep[index].db_path.c_str();
6281
+ } else {
6282
+ return lf->rep[index].directory.c_str();
6283
+ }
6284
+ }
6285
+
6211
6286
  int rocksdb_livefiles_level(const rocksdb_livefiles_t* lf, int index) {
6212
6287
  return lf->rep[index].level;
6213
6288
  }
@@ -6228,6 +6303,16 @@ const char* rocksdb_livefiles_largestkey(const rocksdb_livefiles_t* lf,
6228
6303
  return lf->rep[index].largestkey.data();
6229
6304
  }
6230
6305
 
6306
+ uint64_t rocksdb_livefiles_smallest_seqno(const rocksdb_livefiles_t* lf,
6307
+ int index) {
6308
+ return lf->rep[index].smallest_seqno;
6309
+ }
6310
+
6311
+ uint64_t rocksdb_livefiles_largest_seqno(const rocksdb_livefiles_t* lf,
6312
+ int index) {
6313
+ return lf->rep[index].largest_seqno;
6314
+ }
6315
+
6231
6316
  uint64_t rocksdb_livefiles_entries(const rocksdb_livefiles_t* lf, int index) {
6232
6317
  return lf->rep[index].num_entries;
6233
6318
  }
@@ -6238,6 +6323,71 @@ uint64_t rocksdb_livefiles_deletions(const rocksdb_livefiles_t* lf, int index) {
6238
6323
 
6239
6324
  void rocksdb_livefiles_destroy(const rocksdb_livefiles_t* lf) { delete lf; }
6240
6325
 
6326
+ rocksdb_livefile_t* rocksdb_livefile_create() { return new rocksdb_livefile_t; }
6327
+
6328
+ void rocksdb_livefile_set_column_family_name(rocksdb_livefile_t* lf,
6329
+ const char* column_family_name) {
6330
+ lf->rep.column_family_name = std::string(column_family_name);
6331
+ }
6332
+
6333
+ void rocksdb_livefile_set_level(rocksdb_livefile_t* lf, int level) {
6334
+ lf->rep.level = level;
6335
+ }
6336
+
6337
+ void rocksdb_livefile_set_name(rocksdb_livefile_t* lf, const char* name) {
6338
+ lf->rep.name = std::string(name);
6339
+ }
6340
+
6341
+ void rocksdb_livefile_set_directory(rocksdb_livefile_t* lf,
6342
+ const char* directory) {
6343
+ lf->rep.directory = std::string(directory);
6344
+ lf->rep.db_path = std::string(directory); // deprecated but still needed
6345
+ }
6346
+
6347
+ void rocksdb_livefile_set_size(rocksdb_livefile_t* lf, size_t size) {
6348
+ lf->rep.size = size;
6349
+ }
6350
+
6351
+ void rocksdb_livefile_set_smallest_key(rocksdb_livefile_t* lf,
6352
+ const char* smallest_key,
6353
+ size_t smallest_key_len) {
6354
+ lf->rep.smallestkey = std::string(smallest_key, smallest_key_len);
6355
+ }
6356
+
6357
+ void rocksdb_livefile_set_largest_key(rocksdb_livefile_t* lf,
6358
+ const char* largest_key,
6359
+ size_t largest_key_len) {
6360
+ lf->rep.largestkey = std::string(largest_key, largest_key_len);
6361
+ }
6362
+
6363
+ void rocksdb_livefile_set_smallest_seqno(rocksdb_livefile_t* lf,
6364
+ uint64_t smallest_seqno) {
6365
+ lf->rep.smallest_seqno = smallest_seqno;
6366
+ }
6367
+
6368
+ void rocksdb_livefile_set_largest_seqno(rocksdb_livefile_t* lf,
6369
+ uint64_t largest_seqno) {
6370
+ lf->rep.largest_seqno = largest_seqno;
6371
+ }
6372
+
6373
+ void rocksdb_livefile_set_num_entries(rocksdb_livefile_t* lf,
6374
+ uint64_t num_entries) {
6375
+ lf->rep.num_entries = num_entries;
6376
+ }
6377
+
6378
+ void rocksdb_livefile_set_num_deletions(rocksdb_livefile_t* lf,
6379
+ uint64_t num_deletions) {
6380
+ lf->rep.num_deletions = num_deletions;
6381
+ }
6382
+
6383
+ void rocksdb_livefile_destroy(rocksdb_livefile_t* lf) { delete lf; }
6384
+
6385
+ void rocksdb_livefiles_add(rocksdb_livefiles_t* lf,
6386
+ rocksdb_livefile_t* livefile) {
6387
+ lf->rep.push_back(std::move(livefile->rep));
6388
+ delete livefile;
6389
+ }
6390
+
6241
6391
  void rocksdb_get_options_from_string(const rocksdb_options_t* base_options,
6242
6392
  const char* opts_str,
6243
6393
  rocksdb_options_t* new_options,
@@ -6388,6 +6538,58 @@ char* rocksdb_sst_file_metadata_get_largestkey(
6388
6538
  return CopyString(file_meta->rep->largestkey);
6389
6539
  }
6390
6540
 
6541
+ rocksdb_import_column_family_options_t*
6542
+ rocksdb_import_column_family_options_create() {
6543
+ return new rocksdb_import_column_family_options_t;
6544
+ }
6545
+
6546
+ void rocksdb_import_column_family_options_set_move_files(
6547
+ rocksdb_import_column_family_options_t* opt, unsigned char v) {
6548
+ opt->rep.move_files = v;
6549
+ }
6550
+
6551
+ void rocksdb_import_column_family_options_destroy(
6552
+ rocksdb_import_column_family_options_t* metadata) {
6553
+ delete metadata;
6554
+ }
6555
+
6556
+ rocksdb_export_import_files_metadata_t*
6557
+ rocksdb_export_import_files_metadata_create() {
6558
+ auto metadata = new rocksdb_export_import_files_metadata_t;
6559
+ metadata->rep = new ExportImportFilesMetaData;
6560
+ return metadata;
6561
+ }
6562
+
6563
+ char* rocksdb_export_import_files_metadata_get_db_comparator_name(
6564
+ rocksdb_export_import_files_metadata_t* metadata) {
6565
+ return strdup(metadata->rep->db_comparator_name.c_str());
6566
+ }
6567
+
6568
+ void rocksdb_export_import_files_metadata_set_db_comparator_name(
6569
+ rocksdb_export_import_files_metadata_t* metadata, const char* name) {
6570
+ metadata->rep->db_comparator_name = std::string(name);
6571
+ }
6572
+
6573
+ rocksdb_livefiles_t* rocksdb_export_import_files_metadata_get_files(
6574
+ rocksdb_export_import_files_metadata_t* export_import_metadata) {
6575
+ auto files = new rocksdb_livefiles_t;
6576
+ files->rep = std::vector(export_import_metadata->rep->files);
6577
+ return files;
6578
+ }
6579
+
6580
+ void rocksdb_export_import_files_metadata_set_files(
6581
+ rocksdb_export_import_files_metadata_t* metadata,
6582
+ rocksdb_livefiles_t* files) {
6583
+ metadata->rep->files = std::move(files->rep);
6584
+ delete files;
6585
+ }
6586
+
6587
+ void rocksdb_export_import_files_metadata_destroy(
6588
+ rocksdb_export_import_files_metadata_t* metadata) {
6589
+ delete metadata->rep;
6590
+ delete metadata;
6591
+ }
6592
+
6391
6593
  /* Transactions */
6392
6594
 
6393
6595
  rocksdb_transactiondb_options_t* rocksdb_transactiondb_options_create() {
@@ -6419,6 +6621,11 @@ void rocksdb_transactiondb_options_set_default_lock_timeout(
6419
6621
  opt->rep.default_lock_timeout = default_lock_timeout;
6420
6622
  }
6421
6623
 
6624
+ void rocksdb_transactiondb_options_set_use_per_key_point_lock_mgr(
6625
+ rocksdb_transactiondb_options_t* opt, int use_per_key_point_lock_mgr) {
6626
+ opt->rep.use_per_key_point_lock_mgr = use_per_key_point_lock_mgr;
6627
+ }
6628
+
6422
6629
  rocksdb_transaction_options_t* rocksdb_transaction_options_create() {
6423
6630
  return new rocksdb_transaction_options_t;
6424
6631
  }
@@ -1036,6 +1036,78 @@ int main(int argc, char** argv) {
1036
1036
  rocksdb_options_set_error_if_exists(options, 1);
1037
1037
  }
1038
1038
 
1039
+ StartPhase("checkpoint_export_column_family");
1040
+ {
1041
+ static char cf_export_path[200];
1042
+ static char db_import_path[200];
1043
+ snprintf(cf_export_path, sizeof(cf_export_path),
1044
+ "%s/rocksdb_c_test-%d-cf_export", GetTempDir(), ((int)geteuid()));
1045
+ snprintf(db_import_path, sizeof(db_import_path),
1046
+ "%s/rocksdb_c_test-%d-db_import", GetTempDir(), ((int)geteuid()));
1047
+
1048
+ rocksdb_options_t* db_options = rocksdb_options_create();
1049
+ rocksdb_column_family_handle_t* cf_export =
1050
+ rocksdb_create_column_family(db, db_options, "cf_export", &err);
1051
+ CheckNoError(err);
1052
+
1053
+ rocksdb_put_cf(db, woptions, cf_export, "k1", 2, "v1", 2, &err);
1054
+ CheckNoError(err);
1055
+ rocksdb_put_cf(db, woptions, cf_export, "k2", 2, "v2", 2, &err);
1056
+ CheckNoError(err);
1057
+
1058
+ rocksdb_checkpoint_t* checkpoint =
1059
+ rocksdb_checkpoint_object_create(db, &err);
1060
+ CheckNoError(err);
1061
+
1062
+ rocksdb_export_import_files_metadata_t* export_metadata =
1063
+ rocksdb_checkpoint_export_column_family(checkpoint, cf_export,
1064
+ cf_export_path, &err);
1065
+ CheckNoError(err);
1066
+ const char* comparator_name =
1067
+ rocksdb_export_import_files_metadata_get_db_comparator_name(
1068
+ export_metadata);
1069
+ CheckEqual("leveldb.BytewiseComparator", comparator_name, 26);
1070
+ rocksdb_free((void*)comparator_name);
1071
+ rocksdb_checkpoint_object_destroy(checkpoint);
1072
+ checkpoint = NULL;
1073
+ rocksdb_drop_column_family(db, cf_export, &err);
1074
+ CheckNoError(err);
1075
+ rocksdb_column_family_handle_destroy(cf_export);
1076
+ rocksdb_options_set_create_if_missing(db_options, 1);
1077
+ rocksdb_options_set_error_if_exists(db_options, 1);
1078
+ rocksdb_t* db_import = rocksdb_open(db_options, db_import_path, &err);
1079
+ CheckNoError(err);
1080
+ rocksdb_import_column_family_options_t* import_options =
1081
+ rocksdb_import_column_family_options_create();
1082
+ rocksdb_column_family_handle_t* cf_import =
1083
+ rocksdb_create_column_family_with_import(db_import, db_options,
1084
+ "cf_import", import_options,
1085
+ export_metadata, &err);
1086
+ CheckNoError(err);
1087
+ rocksdb_import_column_family_options_destroy(import_options);
1088
+ rocksdb_export_import_files_metadata_destroy(export_metadata);
1089
+ size_t val_len;
1090
+ char* val =
1091
+ rocksdb_get_cf(db_import, roptions, cf_import, "k1", 2, &val_len, &err);
1092
+ CheckNoError(err);
1093
+ CheckEqual("v1", val, val_len);
1094
+ free(val);
1095
+
1096
+ val =
1097
+ rocksdb_get_cf(db_import, roptions, cf_import, "k2", 2, &val_len, &err);
1098
+ CheckNoError(err);
1099
+ CheckEqual("v2", val, val_len);
1100
+ free(val);
1101
+
1102
+ rocksdb_column_family_handle_destroy(cf_import);
1103
+ cf_import = NULL;
1104
+ rocksdb_close(db_import);
1105
+ rocksdb_destroy_db(db_options, db_import_path, &err);
1106
+ CheckNoError(err);
1107
+ rocksdb_options_destroy(db_options);
1108
+ db_options = NULL;
1109
+ }
1110
+
1039
1111
  StartPhase("compactall");
1040
1112
  rocksdb_compact_range(db, NULL, 0, NULL, 0);
1041
1113
  CheckGet(db, roptions, "foo", "hello");
@@ -280,7 +280,8 @@ ColumnFamilyOptions SanitizeCfOptions(const ImmutableDBOptions& db_options,
280
280
  }
281
281
 
282
282
  if (result.compaction_style == kCompactionStyleUniversal &&
283
- db_options.allow_ingest_behind && result.num_levels < 3) {
283
+ (db_options.allow_ingest_behind || result.cf_allow_ingest_behind) &&
284
+ result.num_levels < 3) {
284
285
  result.num_levels = 3;
285
286
  }
286
287
 
@@ -1331,7 +1332,7 @@ Compaction* ColumnFamilyData::CompactRange(
1331
1332
  const InternalKey* begin, const InternalKey* end,
1332
1333
  InternalKey** compaction_end, bool* conflict,
1333
1334
  uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
1334
- auto* result = compaction_picker_->CompactRange(
1335
+ auto* result = compaction_picker_->PickCompactionForCompactRange(
1335
1336
  GetName(), mutable_cf_options, mutable_db_options,
1336
1337
  current_->storage_info(), input_level, output_level,
1337
1338
  compact_range_options, begin, end, compaction_end, conflict,
@@ -600,6 +600,11 @@ class ColumnFamilyData {
600
600
  return (mem_->IsEmpty() ? 0 : 1) + imm_.NumNotFlushed();
601
601
  }
602
602
 
603
+ // thread-safe, DB mutex not needed.
604
+ bool AllowIngestBehind() const {
605
+ return ioptions_.cf_allow_ingest_behind || ioptions_.allow_ingest_behind;
606
+ }
607
+
603
608
  private:
604
609
  friend class ColumnFamilySet;
605
610
  ColumnFamilyData(
@@ -441,6 +441,10 @@ TEST_F(CompactFilesTest, SentinelCompressionType) {
441
441
  }
442
442
 
443
443
  TEST_F(CompactFilesTest, CompressionWithBlockAlign) {
444
+ if (!Snappy_Supported()) {
445
+ ROCKSDB_GTEST_SKIP("Test requires Snappy support");
446
+ return;
447
+ }
444
448
  Options options;
445
449
  options.compression = CompressionType::kNoCompression;
446
450
  options.create_if_missing = true;
@@ -647,6 +647,8 @@ bool Compaction::KeyNotExistsBeyondOutputLevel(
647
647
  return true;
648
648
  } else if (output_level_ != 0 &&
649
649
  cfd_->ioptions().compaction_style == kCompactionStyleLevel) {
650
+ // TODO: apply the optimization here to other compaction styles and
651
+ // compaction/flush to L0.
650
652
  // Maybe use binary search to find right entry instead of linear search?
651
653
  const Comparator* user_cmp = cfd_->user_comparator();
652
654
  for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
@@ -28,7 +28,7 @@ CompactionIterator::CompactionIterator(
28
28
  SequenceNumber earliest_snapshot,
29
29
  SequenceNumber earliest_write_conflict_snapshot,
30
30
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
31
- Env* env, bool report_detailed_time, bool expect_valid_internal_key,
31
+ Env* env, bool report_detailed_time,
32
32
  CompactionRangeDelAggregator* range_del_agg,
33
33
  BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
34
34
  bool enforce_single_del_contracts,
@@ -42,8 +42,8 @@ CompactionIterator::CompactionIterator(
42
42
  : CompactionIterator(
43
43
  input, cmp, merge_helper, last_sequence, snapshots, earliest_snapshot,
44
44
  earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
45
- report_detailed_time, expect_valid_internal_key, range_del_agg,
46
- blob_file_builder, allow_data_in_errors, enforce_single_del_contracts,
45
+ report_detailed_time, range_del_agg, blob_file_builder,
46
+ allow_data_in_errors, enforce_single_del_contracts,
47
47
  manual_compaction_canceled,
48
48
  compaction ? std::make_unique<RealCompaction>(compaction) : nullptr,
49
49
  must_count_input_entries, compaction_filter, shutting_down, info_log,
@@ -55,7 +55,7 @@ CompactionIterator::CompactionIterator(
55
55
  SequenceNumber earliest_snapshot,
56
56
  SequenceNumber earliest_write_conflict_snapshot,
57
57
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
58
- Env* env, bool report_detailed_time, bool expect_valid_internal_key,
58
+ Env* env, bool report_detailed_time,
59
59
  CompactionRangeDelAggregator* range_del_agg,
60
60
  BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
61
61
  bool enforce_single_del_contracts,
@@ -76,16 +76,14 @@ CompactionIterator::CompactionIterator(
76
76
  env_(env),
77
77
  clock_(env_->GetSystemClock().get()),
78
78
  report_detailed_time_(report_detailed_time),
79
- expect_valid_internal_key_(expect_valid_internal_key),
80
79
  range_del_agg_(range_del_agg),
81
80
  blob_file_builder_(blob_file_builder),
82
81
  compaction_(std::move(compaction)),
83
82
  compaction_filter_(compaction_filter),
84
83
  shutting_down_(shutting_down),
85
84
  manual_compaction_canceled_(manual_compaction_canceled),
86
- bottommost_level_(!compaction_ ? false
87
- : compaction_->bottommost_level() &&
88
- !compaction_->allow_ingest_behind()),
85
+ bottommost_level_(compaction_ && compaction_->bottommost_level() &&
86
+ !compaction_->allow_ingest_behind()),
89
87
  // snapshots_ cannot be nullptr, but we will assert later in the body of
90
88
  // the constructor.
91
89
  visible_at_tip_(snapshots_ ? snapshots_->empty() : false),
@@ -161,6 +159,7 @@ void CompactionIterator::Next() {
161
159
  // MergeUntil stops when it encounters a corrupt key and does not
162
160
  // include them in the result, so we expect the keys here to be valid.
163
161
  if (!s.ok()) {
162
+ // FIXME: should fail compaction after this fatal logging.
164
163
  ROCKS_LOG_FATAL(
165
164
  info_log_, "Invalid ikey %s in compaction. %s",
166
165
  allow_data_in_errors_ ? key_.ToString(true).c_str() : "hidden",
@@ -464,18 +463,9 @@ void CompactionIterator::NextFromInput() {
464
463
  if (!pik_status.ok()) {
465
464
  iter_stats_.num_input_corrupt_records++;
466
465
 
467
- // If `expect_valid_internal_key_` is false, return the corrupted key
468
- // and let the caller decide what to do with it.
469
- if (expect_valid_internal_key_) {
470
- status_ = pik_status;
471
- return;
472
- }
473
- key_ = current_key_.SetInternalKey(key_);
474
- has_current_user_key_ = false;
475
- current_user_key_sequence_ = kMaxSequenceNumber;
476
- current_user_key_snapshot_ = 0;
477
- validity_info_.SetValid(ValidContext::kParseKeyError);
478
- break;
466
+ // Always fail compaction when encountering corrupted internal keys
467
+ status_ = pik_status;
468
+ return;
479
469
  }
480
470
  TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
481
471
  if (is_range_del_) {
@@ -642,7 +632,8 @@ void CompactionIterator::NextFromInput() {
642
632
  } else if (ikey_.type == kTypeSingleDeletion) {
643
633
  // We can compact out a SingleDelete if:
644
634
  // 1) We encounter the corresponding PUT -OR- we know that this key
645
- // doesn't appear past this output level
635
+ // doesn't appear past this output level and we are not in
636
+ // ingest_behind mode.
646
637
  // =AND=
647
638
  // 2) We've already returned a record in this snapshot -OR-
648
639
  // there are no earlier earliest_write_conflict_snapshot.
@@ -731,6 +722,8 @@ void CompactionIterator::NextFromInput() {
731
722
  "CompactionIterator::NextFromInput:SingleDelete:1",
732
723
  const_cast<Compaction*>(c));
733
724
  if (last_key_seq_zeroed_) {
725
+ // Drop SD and the next key since they are both in the last
726
+ // snapshot (since last key has seqno zeroed).
734
727
  ++iter_stats_.num_record_drop_hidden;
735
728
  ++iter_stats_.num_record_drop_obsolete;
736
729
  assert(bottommost_level_);
@@ -841,7 +834,7 @@ void CompactionIterator::NextFromInput() {
841
834
  // iteration. If the next key is corrupt, we return before the
842
835
  // comparison, so the value of has_current_user_key does not matter.
843
836
  has_current_user_key_ = false;
844
- if (compaction_ != nullptr &&
837
+ if (compaction_ != nullptr && !compaction_->allow_ingest_behind() &&
845
838
  DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
846
839
  compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
847
840
  &level_ptrs_) &&
@@ -854,6 +847,9 @@ void CompactionIterator::NextFromInput() {
854
847
  ++iter_stats_.num_optimized_del_drop_obsolete;
855
848
  }
856
849
  } else if (last_key_seq_zeroed_) {
850
+ // Sequence number zeroing requires bottommost_level_, which is
851
+ // false with ingest_behind.
852
+ assert(!compaction_->allow_ingest_behind());
857
853
  // Skip.
858
854
  ++iter_stats_.num_record_drop_hidden;
859
855
  ++iter_stats_.num_record_drop_obsolete;
@@ -870,6 +866,7 @@ void CompactionIterator::NextFromInput() {
870
866
  } else if (last_sequence != kMaxSequenceNumber &&
871
867
  (last_snapshot == current_user_key_snapshot_ ||
872
868
  last_snapshot < current_user_key_snapshot_)) {
869
+ // rule (A):
873
870
  // If the earliest snapshot is which this key is visible in
874
871
  // is the same as the visibility of a previous instance of the
875
872
  // same key, then this kv is not visible in any snapshot.
@@ -878,6 +875,15 @@ void CompactionIterator::NextFromInput() {
878
875
  // Note: Dropping this key will not affect TransactionDB write-conflict
879
876
  // checking since there has already been a record returned for this key
880
877
  // in this snapshot.
878
+ // When ingest_behind is enabled, it's ok that we drop an overwritten
879
+ // Delete here. The overwritting key still covers whatever that will be
880
+ // ingested. Note that we will not drop SingleDelete here as SingleDelte
881
+ // is handled entirely in its own if clause. This is important, see
882
+ // example: from new to old: SingleDelete_1, PUT_1, SingleDelete_2, PUT_2,
883
+ // where all operations are on the same key and PUT_2 is ingested with
884
+ // ingest_behind=true. If SingleDelete_2 is dropped due to being compacted
885
+ // together with PUT_1, and then PUT_1 is compacted away together with
886
+ // SingleDelete_1, PUT_2 can incorrectly becomes visible.
881
887
  if (last_sequence < current_user_key_sequence_) {
882
888
  ROCKS_LOG_FATAL(info_log_,
883
889
  "key %s, last_sequence (%" PRIu64
@@ -887,12 +893,13 @@ void CompactionIterator::NextFromInput() {
887
893
  assert(false);
888
894
  }
889
895
 
890
- ++iter_stats_.num_record_drop_hidden; // rule (A)
896
+ ++iter_stats_.num_record_drop_hidden;
891
897
  AdvanceInputIter();
892
898
  } else if (compaction_ != nullptr &&
893
899
  (ikey_.type == kTypeDeletion ||
894
900
  (ikey_.type == kTypeDeletionWithTimestamp &&
895
901
  cmp_with_history_ts_low_ < 0)) &&
902
+ !compaction_->allow_ingest_behind() &&
896
903
  DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
897
904
  compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
898
905
  &level_ptrs_)) {
@@ -928,11 +935,13 @@ void CompactionIterator::NextFromInput() {
928
935
  (ikey_.type == kTypeDeletionWithTimestamp &&
929
936
  cmp_with_history_ts_low_ < 0)) &&
930
937
  bottommost_level_) {
938
+ assert(compaction_);
939
+ assert(!compaction_->allow_ingest_behind()); // bottommost_level_ is true
931
940
  // Handle the case where we have a delete key at the bottom most level
932
941
  // We can skip outputting the key iff there are no subsequent puts for
933
942
  // this key
934
- assert(!compaction_ || compaction_->KeyNotExistsBeyondOutputLevel(
935
- ikey_.user_key, &level_ptrs_));
943
+ assert(compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
944
+ &level_ptrs_));
936
945
  ParsedInternalKey next_ikey;
937
946
  AdvanceInputIter();
938
947
  #ifndef NDEBUG
@@ -974,6 +983,12 @@ void CompactionIterator::NextFromInput() {
974
983
  (compaction_ != nullptr &&
975
984
  compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
976
985
  &level_ptrs_)))) {
986
+ // FIXME: it's possible that we are setting sequence number to 0 as
987
+ // preferred sequence number here. If cf_ingest_behind is enabled, this
988
+ // may fail ingestions since they expect all keys above the last level
989
+ // to have non-zero sequence number. We should probably not allow seqno
990
+ // zeroing here.
991
+ //
977
992
  // This section that attempts to swap preferred sequence number will not
978
993
  // be invoked if this is a CompactionIterator created for flush, since
979
994
  // `compaction_` will be nullptr and it's not bottommost either.
@@ -1105,17 +1120,15 @@ void CompactionIterator::NextFromInput() {
1105
1120
  }
1106
1121
  }
1107
1122
 
1108
- if (!Valid() && IsShuttingDown()) {
1109
- status_ = Status::ShutdownInProgress();
1110
- }
1111
-
1112
- if (IsPausingManualCompaction()) {
1113
- status_ = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
1114
- }
1115
-
1116
- // Propagate corruption status from memtable itereator
1117
- if (!input_.Valid() && input_.status().IsCorruption()) {
1118
- status_ = input_.status();
1123
+ if (status_.ok()) {
1124
+ if (!Valid() && IsShuttingDown()) {
1125
+ status_ = Status::ShutdownInProgress();
1126
+ } else if (IsPausingManualCompaction()) {
1127
+ status_ = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
1128
+ } else if (!input_.Valid() && input_.status().IsCorruption()) {
1129
+ // Propagate corruption status from memtable iterator
1130
+ status_ = input_.status();
1131
+ }
1119
1132
  }
1120
1133
  }
1121
1134
 
@@ -1274,11 +1287,11 @@ void CompactionIterator::PrepareOutput() {
1274
1287
  //
1275
1288
  // Can we do the same for levels above bottom level as long as
1276
1289
  // KeyNotExistsBeyondOutputLevel() return true?
1277
- if (Valid() && compaction_ != nullptr &&
1278
- !compaction_->allow_ingest_behind() && bottommost_level_ &&
1290
+ if (Valid() && bottommost_level_ &&
1279
1291
  DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
1280
1292
  ikey_.type != kTypeMerge && current_key_committed_ &&
1281
1293
  ikey_.sequence <= preserve_seqno_after_ && !is_range_del_) {
1294
+ assert(compaction_ != nullptr && !compaction_->allow_ingest_behind());
1282
1295
  if (ikey_.type == kTypeDeletion ||
1283
1296
  (ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
1284
1297
  ROCKS_LOG_FATAL(