@nxtedition/rocksdb 13.1.4 → 13.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +43 -16
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -35,7 +35,9 @@ class UniversalCompactionBuilder {
35
35
  UniversalCompactionBuilder(
36
36
  const ImmutableOptions& ioptions, const InternalKeyComparator* icmp,
37
37
  const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
38
- const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
38
+ const MutableDBOptions& mutable_db_options,
39
+ const std::vector<SequenceNumber>& existing_snapshots,
40
+ const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage,
39
41
  UniversalCompactionPicker* picker, LogBuffer* log_buffer)
40
42
  : ioptions_(ioptions),
41
43
  icmp_(icmp),
@@ -44,7 +46,19 @@ class UniversalCompactionBuilder {
44
46
  mutable_db_options_(mutable_db_options),
45
47
  vstorage_(vstorage),
46
48
  picker_(picker),
47
- log_buffer_(log_buffer) {}
49
+ log_buffer_(log_buffer) {
50
+ assert(icmp_);
51
+ const auto* ucmp = icmp_->user_comparator();
52
+ assert(ucmp);
53
+ // These parameters are only passed when user-defined timestamp is not
54
+ // enabled.
55
+ if (ucmp->timestamp_size() == 0) {
56
+ earliest_snapshot_ = existing_snapshots.empty()
57
+ ? kMaxSequenceNumber
58
+ : existing_snapshots.at(0);
59
+ snapshot_checker_ = snapshot_checker;
60
+ }
61
+ }
48
62
 
49
63
  // Form and return the compaction object. The caller owns return object.
50
64
  Compaction* PickCompaction();
@@ -52,12 +66,15 @@ class UniversalCompactionBuilder {
52
66
  private:
53
67
  struct SortedRun {
54
68
  SortedRun(int _level, FileMetaData* _file, uint64_t _size,
55
- uint64_t _compensated_file_size, bool _being_compacted)
69
+ uint64_t _compensated_file_size, bool _being_compacted,
70
+ bool _level_has_marked_standalone_rangedel)
56
71
  : level(_level),
57
72
  file(_file),
58
73
  size(_size),
59
74
  compensated_file_size(_compensated_file_size),
60
- being_compacted(_being_compacted) {
75
+ being_compacted(_being_compacted),
76
+ level_has_marked_standalone_rangedel(
77
+ _level_has_marked_standalone_rangedel) {
61
78
  assert(compensated_file_size > 0);
62
79
  assert(level != 0 || file != nullptr);
63
80
  }
@@ -79,6 +96,10 @@ class UniversalCompactionBuilder {
79
96
  uint64_t size;
80
97
  uint64_t compensated_file_size;
81
98
  bool being_compacted;
99
+ // True if this level has any file that is a standalone range deletion file
100
+ // marked for compaction. Best effort is made to make only deletion
101
+ // triggered compaction pick this type of file.
102
+ bool level_has_marked_standalone_rangedel;
82
103
  };
83
104
 
84
105
  // Pick Universal compaction to limit read amplification
@@ -98,6 +119,11 @@ class UniversalCompactionBuilder {
98
119
 
99
120
  Compaction* PickDeleteTriggeredCompaction();
100
121
 
122
+ // Returns true if this given file (that is marked be compaction) should be
123
+ // skipped from being picked for now. We do this to best use standalone range
124
+ // tombstone files.
125
+ bool ShouldSkipMarkedFile(const FileMetaData* file) const;
126
+
101
127
  // Form a compaction from the sorted run indicated by start_index to the
102
128
  // oldest sorted run.
103
129
  // The caller is responsible for making sure that those files are not in
@@ -116,7 +142,7 @@ class UniversalCompactionBuilder {
116
142
 
117
143
  bool ShouldSkipLastSortedRunForSizeAmpCompaction() const {
118
144
  assert(!sorted_runs_.empty());
119
- return ioptions_.preclude_last_level_data_seconds > 0 &&
145
+ return mutable_cf_options_.preclude_last_level_data_seconds > 0 &&
120
146
  ioptions_.num_levels > 2 &&
121
147
  sorted_runs_.back().level == ioptions_.num_levels - 1 &&
122
148
  sorted_runs_.size() > 1;
@@ -234,8 +260,18 @@ class UniversalCompactionBuilder {
234
260
  VersionStorageInfo* vstorage_;
235
261
  UniversalCompactionPicker* picker_;
236
262
  LogBuffer* log_buffer_;
237
-
238
- static std::vector<UniversalCompactionBuilder::SortedRun> CalculateSortedRuns(
263
+ // Optional earliest snapshot at time of compaction picking. This is only
264
+ // provided if the column family doesn't enable user-defined timestamps.
265
+ // And this information is only passed to `Compaction` picked by deletion
266
+ // triggered compaction for possible optimizations.
267
+ std::optional<SequenceNumber> earliest_snapshot_;
268
+ const SnapshotChecker* snapshot_checker_;
269
+ // Mapping from file id to its index in the sorted run for the files that are
270
+ // marked for compaction. This is only populated when snapshot info is
271
+ // populated.
272
+ std::map<uint64_t, size_t> file_marked_for_compaction_to_sorted_run_index_;
273
+
274
+ std::vector<UniversalCompactionBuilder::SortedRun> CalculateSortedRuns(
239
275
  const VersionStorageInfo& vstorage, int last_level,
240
276
  uint64_t* max_run_size);
241
277
 
@@ -394,11 +430,13 @@ bool UniversalCompactionPicker::NeedsCompaction(
394
430
 
395
431
  Compaction* UniversalCompactionPicker::PickCompaction(
396
432
  const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
397
- const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
433
+ const MutableDBOptions& mutable_db_options,
434
+ const std::vector<SequenceNumber>& existing_snapshots,
435
+ const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage,
398
436
  LogBuffer* log_buffer) {
399
- UniversalCompactionBuilder builder(ioptions_, icmp_, cf_name,
400
- mutable_cf_options, mutable_db_options,
401
- vstorage, this, log_buffer);
437
+ UniversalCompactionBuilder builder(
438
+ ioptions_, icmp_, cf_name, mutable_cf_options, mutable_db_options,
439
+ existing_snapshots, snapshot_checker, vstorage, this, log_buffer);
402
440
  return builder.PickCompaction();
403
441
  }
404
442
 
@@ -448,14 +486,20 @@ UniversalCompactionBuilder::CalculateSortedRuns(
448
486
  *max_run_size = 0;
449
487
  std::vector<UniversalCompactionBuilder::SortedRun> ret;
450
488
  for (FileMetaData* f : vstorage.LevelFiles(0)) {
451
- ret.emplace_back(0, f, f->fd.GetFileSize(), f->compensated_file_size,
452
- f->being_compacted);
489
+ if (earliest_snapshot_.has_value() && f->marked_for_compaction) {
490
+ file_marked_for_compaction_to_sorted_run_index_.emplace(f->fd.GetNumber(),
491
+ ret.size());
492
+ }
493
+ ret.emplace_back(
494
+ 0, f, f->fd.GetFileSize(), f->compensated_file_size, f->being_compacted,
495
+ f->marked_for_compaction && f->FileIsStandAloneRangeTombstone());
453
496
  *max_run_size = std::max(*max_run_size, f->fd.GetFileSize());
454
497
  }
455
498
  for (int level = 1; level <= last_level; level++) {
456
499
  uint64_t total_compensated_size = 0U;
457
500
  uint64_t total_size = 0U;
458
501
  bool being_compacted = false;
502
+ bool level_has_marked_standalone_rangedel = false;
459
503
  for (FileMetaData* f : vstorage.LevelFiles(level)) {
460
504
  total_compensated_size += f->compensated_file_size;
461
505
  total_size += f->fd.GetFileSize();
@@ -467,16 +511,57 @@ UniversalCompactionBuilder::CalculateSortedRuns(
467
511
  if (f->being_compacted) {
468
512
  being_compacted = f->being_compacted;
469
513
  }
514
+ level_has_marked_standalone_rangedel =
515
+ level_has_marked_standalone_rangedel ||
516
+ (f->marked_for_compaction && f->FileIsStandAloneRangeTombstone());
517
+ if (earliest_snapshot_.has_value() && f->marked_for_compaction) {
518
+ file_marked_for_compaction_to_sorted_run_index_.emplace(
519
+ f->fd.GetNumber(), ret.size());
520
+ }
470
521
  }
471
522
  if (total_compensated_size > 0) {
472
523
  ret.emplace_back(level, nullptr, total_size, total_compensated_size,
473
- being_compacted);
524
+ being_compacted, level_has_marked_standalone_rangedel);
474
525
  }
475
526
  *max_run_size = std::max(*max_run_size, total_size);
476
527
  }
477
528
  return ret;
478
529
  }
479
530
 
531
+ bool UniversalCompactionBuilder::ShouldSkipMarkedFile(
532
+ const FileMetaData* file) const {
533
+ assert(file->marked_for_compaction);
534
+ if (!earliest_snapshot_.has_value()) {
535
+ return false;
536
+ }
537
+ if (!file->FileIsStandAloneRangeTombstone()) {
538
+ return false;
539
+ }
540
+ // Skip until earliest snapshot advances at or above this standalone range
541
+ // tombstone file. `DB::ReleaseSnapshot` will re-examine and schedule
542
+ // compaction for it.
543
+ if (!DataIsDefinitelyInSnapshot(file->fd.largest_seqno,
544
+ earliest_snapshot_.value(),
545
+ snapshot_checker_)) {
546
+ return true;
547
+ }
548
+
549
+ auto iter = file_marked_for_compaction_to_sorted_run_index_.find(
550
+ file->fd.GetNumber());
551
+ assert(iter != file_marked_for_compaction_to_sorted_run_index_.end());
552
+ size_t idx = iter->second;
553
+ const SortedRun* succeeding_sorted_run =
554
+ idx < sorted_runs_.size() - 1 ? &sorted_runs_[idx + 1] : nullptr;
555
+ // Marked standalone range tombstone file is best used if it's in the start
556
+ // input level. Skip to let that compaction happen first.
557
+ if (succeeding_sorted_run &&
558
+ succeeding_sorted_run->level_has_marked_standalone_rangedel) {
559
+ return true;
560
+ }
561
+
562
+ return false;
563
+ }
564
+
480
565
  // Universal style of compaction. Pick files that are contiguous in
481
566
  // time-range to compact.
482
567
  Compaction* UniversalCompactionBuilder::PickCompaction() {
@@ -580,7 +665,8 @@ Compaction* UniversalCompactionBuilder::PickCompaction() {
580
665
  // Get the total number of sorted runs that are not being compacted
581
666
  int num_sr_not_compacted = 0;
582
667
  for (size_t i = 0; i < sorted_runs_.size(); i++) {
583
- if (sorted_runs_[i].being_compacted == false) {
668
+ if (sorted_runs_[i].being_compacted == false &&
669
+ !sorted_runs_[i].level_has_marked_standalone_rangedel) {
584
670
  num_sr_not_compacted++;
585
671
  }
586
672
  }
@@ -743,16 +829,24 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns(
743
829
  for (sr = nullptr; loop < sorted_runs_.size(); loop++) {
744
830
  sr = &sorted_runs_[loop];
745
831
 
746
- if (!sr->being_compacted) {
832
+ if (!sr->being_compacted && !sr->level_has_marked_standalone_rangedel) {
747
833
  candidate_count = 1;
748
834
  break;
749
835
  }
750
836
  char file_num_buf[kFormatFileNumberBufSize];
751
837
  sr->Dump(file_num_buf, sizeof(file_num_buf));
752
- ROCKS_LOG_BUFFER(log_buffer_,
753
- "[%s] Universal: %s"
754
- "[%d] being compacted, skipping",
755
- cf_name_.c_str(), file_num_buf, loop);
838
+ if (sr->being_compacted) {
839
+ ROCKS_LOG_BUFFER(log_buffer_,
840
+ "[%s] Universal: %s"
841
+ "[%d] being compacted, skipping",
842
+ cf_name_.c_str(), file_num_buf, loop);
843
+ } else if (sr->level_has_marked_standalone_rangedel) {
844
+ ROCKS_LOG_BUFFER(log_buffer_,
845
+ "[%s] Universal: %s"
846
+ "[%d] has standalone range tombstone files marked for "
847
+ "compaction, skipping",
848
+ cf_name_.c_str(), file_num_buf, loop);
849
+ }
756
850
 
757
851
  sr = nullptr;
758
852
  }
@@ -773,7 +867,8 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns(
773
867
  candidate_count < max_files_to_compact && i < sorted_runs_.size();
774
868
  i++) {
775
869
  const SortedRun* succeeding_sr = &sorted_runs_[i];
776
- if (succeeding_sr->being_compacted) {
870
+ if (succeeding_sr->being_compacted ||
871
+ succeeding_sr->level_has_marked_standalone_rangedel) {
777
872
  break;
778
873
  }
779
874
  // Pick files if the total/last candidate file size (increased by the
@@ -899,11 +994,11 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns(
899
994
  grandparents = vstorage_->LevelFiles(sorted_runs_[first_index_after].level);
900
995
  }
901
996
 
902
- if (output_level != 0 &&
903
- picker_->FilesRangeOverlapWithCompaction(
904
- inputs, output_level,
905
- Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
906
- start_level, output_level))) {
997
+ if (output_level != 0 && picker_->FilesRangeOverlapWithCompaction(
998
+ inputs, output_level,
999
+ Compaction::EvaluatePenultimateLevel(
1000
+ vstorage_, mutable_cf_options_, ioptions_,
1001
+ start_level, output_level))) {
907
1002
  return nullptr;
908
1003
  }
909
1004
  CompactionReason compaction_reason;
@@ -923,6 +1018,8 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns(
923
1018
  output_level, enable_compression),
924
1019
  mutable_cf_options_.default_write_temperature,
925
1020
  /* max_subcompactions */ 0, grandparents,
1021
+ /* earliest_snapshot */ std::nullopt,
1022
+ /* snapshot_checker */ nullptr,
926
1023
  /* is manual */ false, /* trim_ts */ "", score_,
927
1024
  false /* deletion_compaction */,
928
1025
  /* l0_files_might_overlap */ true, compaction_reason);
@@ -939,7 +1036,8 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() {
939
1036
  const size_t end_index = ShouldSkipLastSortedRunForSizeAmpCompaction()
940
1037
  ? sorted_runs_.size() - 2
941
1038
  : sorted_runs_.size() - 1;
942
- if (sorted_runs_[end_index].being_compacted) {
1039
+ if (sorted_runs_[end_index].being_compacted ||
1040
+ sorted_runs_[end_index].level_has_marked_standalone_rangedel) {
943
1041
  return nullptr;
944
1042
  }
945
1043
  const uint64_t base_sr_size = sorted_runs_[end_index].size;
@@ -950,14 +1048,23 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() {
950
1048
  // Get longest span (i.e, [start_index, end_index]) of available sorted runs
951
1049
  while (start_index > 0) {
952
1050
  const SortedRun* sr = &sorted_runs_[start_index - 1];
953
- if (sr->being_compacted) {
1051
+ if (sr->being_compacted || sr->level_has_marked_standalone_rangedel) {
954
1052
  char file_num_buf[kFormatFileNumberBufSize];
955
1053
  sr->Dump(file_num_buf, sizeof(file_num_buf), true);
956
- ROCKS_LOG_BUFFER(
957
- log_buffer_,
958
- "[%s] Universal: stopping at sorted run undergoing compaction: "
959
- "%s[%" ROCKSDB_PRIszt "]",
960
- cf_name_.c_str(), file_num_buf, start_index - 1);
1054
+ if (sr->being_compacted) {
1055
+ ROCKS_LOG_BUFFER(
1056
+ log_buffer_,
1057
+ "[%s] Universal: stopping at sorted run undergoing compaction: "
1058
+ "%s[%" ROCKSDB_PRIszt "]",
1059
+ cf_name_.c_str(), file_num_buf, start_index - 1);
1060
+ } else if (sr->level_has_marked_standalone_rangedel) {
1061
+ ROCKS_LOG_BUFFER(
1062
+ log_buffer_,
1063
+ "[%s] Universal: stopping at sorted run that has standalone range "
1064
+ "tombstone files marked for compaction: "
1065
+ "%s[%" ROCKSDB_PRIszt "]",
1066
+ cf_name_.c_str(), file_num_buf, start_index - 1);
1067
+ }
961
1068
  break;
962
1069
  }
963
1070
  candidate_size += sr->compensated_file_size;
@@ -1236,11 +1343,11 @@ Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp(
1236
1343
  }
1237
1344
 
1238
1345
  // intra L0 compactions outputs could have overlap
1239
- if (output_level != 0 &&
1240
- picker_->FilesRangeOverlapWithCompaction(
1241
- inputs, output_level,
1242
- Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
1243
- start_level, output_level))) {
1346
+ if (output_level != 0 && picker_->FilesRangeOverlapWithCompaction(
1347
+ inputs, output_level,
1348
+ Compaction::EvaluatePenultimateLevel(
1349
+ vstorage_, mutable_cf_options_, ioptions_,
1350
+ start_level, output_level))) {
1244
1351
  return nullptr;
1245
1352
  }
1246
1353
 
@@ -1257,7 +1364,10 @@ Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp(
1257
1364
  GetCompressionOptions(mutable_cf_options_, vstorage_, output_level,
1258
1365
  true /* enable_compression */),
1259
1366
  mutable_cf_options_.default_write_temperature,
1260
- /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false,
1367
+ /* max_subcompactions */ 0, /* grandparents */ {},
1368
+ /* earliest_snapshot */ std::nullopt,
1369
+ /* snapshot_checker */ nullptr,
1370
+ /* is manual */ false,
1261
1371
  /* trim_ts */ "", score_, false /* deletion_compaction */,
1262
1372
  /* l0_files_might_overlap */ true,
1263
1373
  CompactionReason::kUniversalSizeAmplification);
@@ -1288,7 +1398,7 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
1288
1398
  continue;
1289
1399
  }
1290
1400
  FileMetaData* f = vstorage_->LevelFiles(0)[loop];
1291
- if (f->marked_for_compaction) {
1401
+ if (f->marked_for_compaction && !ShouldSkipMarkedFile(f)) {
1292
1402
  start_level_inputs.files.push_back(f);
1293
1403
  start_index =
1294
1404
  static_cast<int>(loop); // Consider this as the first candidate.
@@ -1302,7 +1412,7 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
1302
1412
 
1303
1413
  for (size_t loop = start_index + 1; loop < sorted_runs_.size(); loop++) {
1304
1414
  SortedRun* sr = &sorted_runs_[loop];
1305
- if (sr->being_compacted) {
1415
+ if (sr->being_compacted || sr->level_has_marked_standalone_rangedel) {
1306
1416
  break;
1307
1417
  }
1308
1418
 
@@ -1321,7 +1431,10 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
1321
1431
  // leveled. We pick one of the files marked for compaction and compact with
1322
1432
  // overlapping files in the adjacent level.
1323
1433
  picker_->PickFilesMarkedForCompaction(cf_name_, vstorage_, &start_level,
1324
- &output_level, &start_level_inputs);
1434
+ &output_level, &start_level_inputs,
1435
+ [this](const FileMetaData* file) {
1436
+ return ShouldSkipMarkedFile(file);
1437
+ });
1325
1438
  if (start_level_inputs.empty()) {
1326
1439
  return nullptr;
1327
1440
  }
@@ -1374,7 +1487,8 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
1374
1487
  if (picker_->FilesRangeOverlapWithCompaction(
1375
1488
  inputs, output_level,
1376
1489
  Compaction::EvaluatePenultimateLevel(
1377
- vstorage_, ioptions_, start_level, output_level))) {
1490
+ vstorage_, mutable_cf_options_, ioptions_, start_level,
1491
+ output_level))) {
1378
1492
  return nullptr;
1379
1493
  }
1380
1494
 
@@ -1401,7 +1515,9 @@ Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
1401
1515
  GetCompressionType(vstorage_, mutable_cf_options_, output_level, 1),
1402
1516
  GetCompressionOptions(mutable_cf_options_, vstorage_, output_level),
1403
1517
  mutable_cf_options_.default_write_temperature,
1404
- /* max_subcompactions */ 0, grandparents, /* is manual */ false,
1518
+ /* max_subcompactions */ 0, grandparents, earliest_snapshot_,
1519
+ snapshot_checker_,
1520
+ /* is manual */ false,
1405
1521
  /* trim_ts */ "", score_, false /* deletion_compaction */,
1406
1522
  /* l0_files_might_overlap */ true,
1407
1523
  CompactionReason::kFilesMarkedForCompaction);
@@ -1472,11 +1588,11 @@ Compaction* UniversalCompactionBuilder::PickCompactionWithSortedRunRange(
1472
1588
  }
1473
1589
 
1474
1590
  // intra L0 compactions outputs could have overlap
1475
- if (output_level != 0 &&
1476
- picker_->FilesRangeOverlapWithCompaction(
1477
- inputs, output_level,
1478
- Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
1479
- start_level, output_level))) {
1591
+ if (output_level != 0 && picker_->FilesRangeOverlapWithCompaction(
1592
+ inputs, output_level,
1593
+ Compaction::EvaluatePenultimateLevel(
1594
+ vstorage_, mutable_cf_options_, ioptions_,
1595
+ start_level, output_level))) {
1480
1596
  return nullptr;
1481
1597
  }
1482
1598
 
@@ -1494,7 +1610,10 @@ Compaction* UniversalCompactionBuilder::PickCompactionWithSortedRunRange(
1494
1610
  GetCompressionOptions(mutable_cf_options_, vstorage_, output_level,
1495
1611
  true /* enable_compression */),
1496
1612
  mutable_cf_options_.default_write_temperature,
1497
- /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false,
1613
+ /* max_subcompactions */ 0, /* grandparents */ {},
1614
+ /* earliest_snapshot */ std::nullopt,
1615
+ /* snapshot_checker */ nullptr,
1616
+ /* is manual */ false,
1498
1617
  /* trim_ts */ "", score_, false /* deletion_compaction */,
1499
1618
  /* l0_files_might_overlap */ true, compaction_reason);
1500
1619
  }
@@ -1515,7 +1634,8 @@ Compaction* UniversalCompactionBuilder::PickPeriodicCompaction() {
1515
1634
  // included in the compaction.
1516
1635
 
1517
1636
  size_t start_index = sorted_runs_.size();
1518
- while (start_index > 0 && !sorted_runs_[start_index - 1].being_compacted) {
1637
+ while (start_index > 0 && !sorted_runs_[start_index - 1].being_compacted &&
1638
+ !sorted_runs_[start_index - 1].level_has_marked_standalone_rangedel) {
1519
1639
  start_index--;
1520
1640
  }
1521
1641
  if (start_index == sorted_runs_.size()) {
@@ -10,6 +10,7 @@
10
10
  #pragma once
11
11
 
12
12
  #include "db/compaction/compaction_picker.h"
13
+ #include "db/snapshot_checker.h"
13
14
 
14
15
  namespace ROCKSDB_NAMESPACE {
15
16
  class UniversalCompactionPicker : public CompactionPicker {
@@ -17,11 +18,12 @@ class UniversalCompactionPicker : public CompactionPicker {
17
18
  UniversalCompactionPicker(const ImmutableOptions& ioptions,
18
19
  const InternalKeyComparator* icmp)
19
20
  : CompactionPicker(ioptions, icmp) {}
20
- Compaction* PickCompaction(const std::string& cf_name,
21
- const MutableCFOptions& mutable_cf_options,
22
- const MutableDBOptions& mutable_db_options,
23
- VersionStorageInfo* vstorage,
24
- LogBuffer* log_buffer) override;
21
+ Compaction* PickCompaction(
22
+ const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
23
+ const MutableDBOptions& mutable_db_options,
24
+ const std::vector<SequenceNumber>& existing_snapshots,
25
+ const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage,
26
+ LogBuffer* log_buffer) override;
25
27
  int MaxOutputLevel() const override { return NumberLevels() - 1; }
26
28
 
27
29
  bool NeedsCompaction(const VersionStorageInfo* vstorage) const override;
@@ -212,6 +212,7 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService(
212
212
  sub_compact->Current().AddOutput(std::move(meta),
213
213
  cfd->internal_comparator(), false, true,
214
214
  file.paranoid_hash);
215
+ sub_compact->Current().UpdateTableProperties(file.table_properties);
215
216
  }
216
217
  sub_compact->compaction_job_stats = compaction_result.stats;
217
218
  sub_compact->Current().SetNumOutputRecords(
@@ -375,15 +376,18 @@ Status CompactionServiceCompactionJob::Run() {
375
376
  // Build Output
376
377
  compaction_result_->output_level = compact_->compaction->output_level();
377
378
  compaction_result_->output_path = output_path_;
378
- for (const auto& output_file : sub_compact->GetOutputs()) {
379
- auto& meta = output_file.meta;
380
- compaction_result_->output_files.emplace_back(
381
- MakeTableFileName(meta.fd.GetNumber()), meta.fd.smallest_seqno,
382
- meta.fd.largest_seqno, meta.smallest.Encode().ToString(),
383
- meta.largest.Encode().ToString(), meta.oldest_ancester_time,
384
- meta.file_creation_time, meta.epoch_number, meta.file_checksum,
385
- meta.file_checksum_func_name, output_file.validator.GetHash(),
386
- meta.marked_for_compaction, meta.unique_id);
379
+ if (status.ok()) {
380
+ for (const auto& output_file : sub_compact->GetOutputs()) {
381
+ auto& meta = output_file.meta;
382
+ compaction_result_->output_files.emplace_back(
383
+ MakeTableFileName(meta.fd.GetNumber()), meta.fd.smallest_seqno,
384
+ meta.fd.largest_seqno, meta.smallest.Encode().ToString(),
385
+ meta.largest.Encode().ToString(), meta.oldest_ancester_time,
386
+ meta.file_creation_time, meta.epoch_number, meta.file_checksum,
387
+ meta.file_checksum_func_name, output_file.validator.GetHash(),
388
+ meta.marked_for_compaction, meta.unique_id,
389
+ *output_file.table_properties);
390
+ }
387
391
  }
388
392
 
389
393
  TEST_SYNC_POINT_CALLBACK("CompactionServiceCompactionJob::Run:0",
@@ -531,7 +535,30 @@ static std::unordered_map<std::string, OptionTypeInfo>
531
535
  offsetof(struct CompactionServiceOutputFile, unique_id),
532
536
  OptionVerificationType::kNormal, OptionTypeFlags::kNone,
533
537
  {0, OptionType::kUInt64T})},
534
- };
538
+ {"table_properties",
539
+ {offsetof(struct CompactionServiceOutputFile, table_properties),
540
+ OptionType::kStruct, OptionVerificationType::kNormal,
541
+ OptionTypeFlags::kNone,
542
+ [](const ConfigOptions& opts, const std::string& /*name*/,
543
+ const std::string& value, void* addr) {
544
+ auto table_properties = static_cast<TableProperties*>(addr);
545
+ return TableProperties::Parse(opts, value, table_properties);
546
+ },
547
+ [](const ConfigOptions& opts, const std::string& /*name*/,
548
+ const void* addr, std::string* value) {
549
+ const auto table_properties =
550
+ static_cast<const TableProperties*>(addr);
551
+ std::string result;
552
+ auto status = table_properties->Serialize(opts, &result);
553
+ *value = "{" + result + "}";
554
+ return status;
555
+ },
556
+ [](const ConfigOptions& opts, const std::string& /*name*/,
557
+ const void* addr1, const void* addr2, std::string* mismatch) {
558
+ const auto this_one = static_cast<const TableProperties*>(addr1);
559
+ const auto that_one = static_cast<const TableProperties*>(addr2);
560
+ return this_one->AreEqual(opts, that_one, mismatch);
561
+ }}}};
535
562
 
536
563
  static std::unordered_map<std::string, OptionTypeInfo>
537
564
  compaction_job_stats_type_info = {
@@ -416,6 +416,38 @@ TEST_F(CompactionServiceTest, ManualCompaction) {
416
416
  ASSERT_TRUE(result.stats.is_remote_compaction);
417
417
  }
418
418
 
419
+ TEST_F(CompactionServiceTest, CompactionOutputFileIOError) {
420
+ Options options = CurrentOptions();
421
+ options.disable_auto_compactions = true;
422
+ ReopenWithCompactionService(&options);
423
+ GenerateTestData();
424
+
425
+ auto my_cs = GetCompactionService();
426
+
427
+ SyncPoint::GetInstance()->SetCallBack(
428
+ "CompactionJob::FinishCompactionOutputFile()::AfterFinish",
429
+ [&](void* status) {
430
+ // override status
431
+ auto s = static_cast<Status*>(status);
432
+ *s = Status::IOError("Injected IOError!");
433
+ });
434
+ SyncPoint::GetInstance()->EnableProcessing();
435
+
436
+ std::string start_str = Key(15);
437
+ std::string end_str = Key(45);
438
+ Slice start(start_str);
439
+ Slice end(end_str);
440
+ uint64_t comp_num = my_cs->GetCompactionNum();
441
+ ASSERT_NOK(db_->CompactRange(CompactRangeOptions(), &start, &end));
442
+ ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1);
443
+
444
+ CompactionServiceResult result;
445
+ my_cs->GetResult(&result);
446
+ ASSERT_NOK(result.status);
447
+ ASSERT_TRUE(result.stats.is_manual_compaction);
448
+ ASSERT_TRUE(result.stats.is_remote_compaction);
449
+ }
450
+
419
451
  TEST_F(CompactionServiceTest, PreservedOptionsLocalCompaction) {
420
452
  Options options = CurrentOptions();
421
453
  options.level0_file_num_compaction_trigger = 2;
@@ -483,21 +515,29 @@ TEST_F(CompactionServiceTest, PreservedOptionsRemoteCompaction) {
483
515
  ASSERT_OK(Flush());
484
516
  }
485
517
 
486
- bool is_primary_called = false;
487
- // This will be called twice. One from primary and one from remote.
488
- // Try changing the option when called from remote. Otherwise, the new option
489
- // will be used
518
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
519
+ {{"CompactionServiceTest::OptionsFileChanged",
520
+ "DBImplSecondary::OpenAndCompact::BeforeLoadingOptions:1"}});
521
+
490
522
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
491
- "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", [&](void* /*arg*/) {
492
- if (!is_primary_called) {
493
- is_primary_called = true;
494
- return;
495
- }
496
- // Change the option right before the compaction run
523
+ "DBImplSecondary::OpenAndCompact::BeforeLoadingOptions:0",
524
+ [&](void* arg) {
525
+ auto options_file_number = static_cast<uint64_t*>(arg);
526
+ // Change the option twice before the compaction run
497
527
  ASSERT_OK(dbfull()->SetOptions(
498
528
  {{"level0_file_num_compaction_trigger", "4"}}));
499
529
  ASSERT_EQ(4, dbfull()->GetOptions().level0_file_num_compaction_trigger);
500
- dbfull()->TEST_DeleteObsoleteFiles();
530
+ ASSERT_TRUE(dbfull()->versions_->options_file_number() >
531
+ *options_file_number);
532
+
533
+ // Change the option twice before the compaction run
534
+ ASSERT_OK(dbfull()->SetOptions(
535
+ {{"level0_file_num_compaction_trigger", "5"}}));
536
+ ASSERT_EQ(5, dbfull()->GetOptions().level0_file_num_compaction_trigger);
537
+ ASSERT_TRUE(dbfull()->versions_->options_file_number() >
538
+ *options_file_number);
539
+
540
+ TEST_SYNC_POINT("CompactionServiceTest::OptionsFileChanged");
501
541
  });
502
542
 
503
543
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
@@ -34,9 +34,16 @@ void SubcompactionState::Cleanup(Cache* cache) {
34
34
 
35
35
  if (!status.ok()) {
36
36
  for (const auto& out : GetOutputs()) {
37
- // If this file was inserted into the table cache then remove
38
- // them here because this compaction was not committed.
39
- TableCache::Evict(cache, out.meta.fd.GetNumber());
37
+ // If this file was inserted into the table cache then remove it here
38
+ // because this compaction was not committed. This is not strictly
39
+ // required because of a backstop TableCache::Evict() in
40
+ // PurgeObsoleteFiles() but is our opportunity to apply
41
+ // uncache_aggressiveness. TODO: instead, put these files into the
42
+ // VersionSet::obsolete_files_ pipeline so that they don't have to
43
+ // be picked up by scanning the DB directory.
44
+ TableCache::ReleaseObsolete(
45
+ cache, out.meta.fd.GetNumber(), nullptr /*handle*/,
46
+ compaction->mutable_cf_options()->uncache_aggressiveness);
40
47
  }
41
48
  }
42
49
  // TODO: sub_compact.io_status is not checked like status. Not sure if thats