@nxtedition/rocksdb 13.1.5 → 13.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +62 -15
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -8,6 +8,8 @@
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
 
10
10
  #pragma once
11
+
12
+ #include "db/snapshot_checker.h"
11
13
  #include "db/version_set.h"
12
14
  #include "memory/arena.h"
13
15
  #include "options/cf_options.h"
@@ -90,6 +92,8 @@ class Compaction {
90
92
  CompressionOptions compression_opts,
91
93
  Temperature output_temperature, uint32_t max_subcompactions,
92
94
  std::vector<FileMetaData*> grandparents,
95
+ std::optional<SequenceNumber> earliest_snapshot,
96
+ const SnapshotChecker* snapshot_checker,
93
97
  bool manual_compaction = false, const std::string& trim_ts = "",
94
98
  double score = -1, bool deletion_compaction = false,
95
99
  bool l0_files_might_overlap = true,
@@ -180,6 +184,16 @@ class Compaction {
180
184
  return &input_levels_[compaction_input_level];
181
185
  }
182
186
 
187
+ // Returns the filtered input files of the specified compaction input level.
188
+ // For now, only non start level is filtered.
189
+ const std::vector<FileMetaData*>& filtered_input_levels(
190
+ size_t compaction_input_level) const {
191
+ const std::vector<FileMetaData*>& filtered_input_level =
192
+ filtered_input_levels_[compaction_input_level];
193
+ assert(compaction_input_level != 0 || filtered_input_level.size() == 0);
194
+ return filtered_input_level;
195
+ }
196
+
183
197
  // Maximum size of files to build during this compaction.
184
198
  uint64_t max_output_file_size() const { return max_output_file_size_; }
185
199
 
@@ -401,6 +415,12 @@ class Compaction {
401
415
  return blob_garbage_collection_age_cutoff_;
402
416
  }
403
417
 
418
+ // start and end are sub compact range. Null if no boundary.
419
+ // This is used to calculate the newest_key_time table property after
420
+ // compaction.
421
+ uint64_t MaxInputFileNewestKeyTime(const InternalKey* start,
422
+ const InternalKey* end) const;
423
+
404
424
  // start and end are sub compact range. Null if no boundary.
405
425
  // This is used to filter out some input files' ancester's time range.
406
426
  uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
@@ -430,10 +450,11 @@ class Compaction {
430
450
  // penultimate level. The safe key range is populated by
431
451
  // `PopulatePenultimateLevelOutputRange()`.
432
452
  // Which could potentially disable all penultimate level output.
433
- static int EvaluatePenultimateLevel(const VersionStorageInfo* vstorage,
434
- const ImmutableOptions& immutable_options,
435
- const int start_level,
436
- const int output_level);
453
+ static int EvaluatePenultimateLevel(
454
+ const VersionStorageInfo* vstorage,
455
+ const MutableCFOptions& mutable_cf_options,
456
+ const ImmutableOptions& immutable_options, const int start_level,
457
+ const int output_level);
437
458
 
438
459
  // mark (or clear) all files that are being compacted
439
460
  void MarkFilesBeingCompacted(bool being_compacted) const;
@@ -460,6 +481,13 @@ class Compaction {
460
481
  // `Compaction::WithinPenultimateLevelOutputRange()`.
461
482
  void PopulatePenultimateLevelOutputRange();
462
483
 
484
+ // If oldest snapshot is specified at Compaction construction time, we have
485
+ // an opportunity to optimize inputs for compaction iterator for this case:
486
+ // When a standalone range deletion file on the start level is recognized and
487
+ // can be determined to completely shadow some input files on non-start level.
488
+ // These files will be filtered out and later not feed to compaction iterator.
489
+ void FilterInputsForCompactionIterator();
490
+
463
491
  // Get the atomic file boundaries for all files in the compaction. Necessary
464
492
  // in order to avoid the scenario described in
465
493
  // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and
@@ -510,12 +538,30 @@ class Compaction {
510
538
  // Compaction input files organized by level. Constant after construction
511
539
  const std::vector<CompactionInputFiles> inputs_;
512
540
 
513
- // A copy of inputs_, organized more closely in memory
541
+ // All files from inputs_ that are not filtered and will be fed to compaction
542
+ // iterator, organized more closely in memory.
514
543
  autovector<LevelFilesBrief, 2> input_levels_;
515
544
 
516
545
  // State used to check for number of overlapping grandparent files
517
546
  // (grandparent == "output_level_ + 1")
518
547
  std::vector<FileMetaData*> grandparents_;
548
+
549
+ // The earliest snapshot and snapshot checker at compaction picking time.
550
+ // These fields are only set for deletion triggered compactions picked in
551
+ // universal compaction. And when user-defined timestamp is not enabled.
552
+ // It will be used to possibly filter out some non start level input files.
553
+ std::optional<SequenceNumber> earliest_snapshot_;
554
+ const SnapshotChecker* snapshot_checker_;
555
+
556
+ // Markers for which non start level input files are filtered out if
557
+ // applicable. Only applicable if earliest_snapshot_ is provided and input
558
+ // start level has a standalone range deletion file. Filtered files are
559
+ // tracked in `filtered_input_levels_`.
560
+ std::vector<std::vector<bool>> non_start_level_input_files_filtered_;
561
+
562
+ // All files from inputs_ that are filtered.
563
+ std::vector<std::vector<FileMetaData*>> filtered_input_levels_;
564
+
519
565
  const double score_; // score that was used to pick this compaction.
520
566
 
521
567
  // Is this compaction creating a file in the bottom most level?
@@ -872,8 +872,8 @@ void CompactionIterator::NextFromInput() {
872
872
  if (Valid()) {
873
873
  at_next_ = true;
874
874
  }
875
- } else if (last_snapshot == current_user_key_snapshot_ ||
876
- (last_snapshot > 0 &&
875
+ } else if (last_sequence != kMaxSequenceNumber &&
876
+ (last_snapshot == current_user_key_snapshot_ ||
877
877
  last_snapshot < current_user_key_snapshot_)) {
878
878
  // If the earliest snapshot is which this key is visible in
879
879
  // is the same as the visibility of a previous instance of the
@@ -540,18 +540,12 @@ class CompactionIterator {
540
540
 
541
541
  inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
542
542
  SequenceNumber snapshot) {
543
- return ((seq) <= (snapshot) &&
544
- (snapshot_checker_ == nullptr ||
545
- LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
546
- SnapshotCheckerResult::kInSnapshot)));
543
+ return DataIsDefinitelyInSnapshot(seq, snapshot, snapshot_checker_);
547
544
  }
548
545
 
549
546
  inline bool CompactionIterator::DefinitelyNotInSnapshot(
550
547
  SequenceNumber seq, SequenceNumber snapshot) {
551
- return ((seq) > (snapshot) ||
552
- (snapshot_checker_ != nullptr &&
553
- UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
554
- SnapshotCheckerResult::kNotInSnapshot)));
548
+ return DataIsDefinitelyNotInSnapshot(seq, snapshot, snapshot_checker_);
555
549
  }
556
550
 
557
551
  } // namespace ROCKSDB_NAMESPACE
@@ -833,6 +833,14 @@ TEST_P(CompactionIteratorTest, ConvertToPutAtBottom) {
833
833
  true /*bottomost_level*/);
834
834
  }
835
835
 
836
+ TEST_P(CompactionIteratorTest, ZeroSeqOfKeyAndSnapshot) {
837
+ AddSnapshot(0);
838
+ const std::vector<std::string> input_keys = {
839
+ test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 0, kTypeValue)};
840
+ const std::vector<std::string> input_values = {"a1", "b1"};
841
+ RunTest(input_keys, input_values, input_keys, input_values);
842
+ }
843
+
836
844
  INSTANTIATE_TEST_CASE_P(CompactionIteratorTestInstance, CompactionIteratorTest,
837
845
  testing::Values(true, false));
838
846
 
@@ -1846,6 +1854,22 @@ TEST_P(CompactionIteratorTsGcTest, SingleDeleteAllKeysOlderThanThreshold) {
1846
1854
  }
1847
1855
  }
1848
1856
 
1857
+ TEST_P(CompactionIteratorTsGcTest, ZeroSeqOfKeyAndSnapshot) {
1858
+ AddSnapshot(0);
1859
+ std::string full_history_ts_low;
1860
+ PutFixed64(&full_history_ts_low, std::numeric_limits<uint64_t>::max());
1861
+ const std::vector<std::string> input_keys = {
1862
+ test::KeyStr(101, "a", 0, kTypeValue),
1863
+ test::KeyStr(102, "b", 0, kTypeValue)};
1864
+ const std::vector<std::string> input_values = {"a1", "b1"};
1865
+ RunTest(input_keys, input_values, input_keys, input_values,
1866
+ /*last_committed_seq=*/kMaxSequenceNumber,
1867
+ /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr,
1868
+ /*bottommost_level=*/false,
1869
+ /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber,
1870
+ /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low);
1871
+ }
1872
+
1849
1873
  INSTANTIATE_TEST_CASE_P(CompactionIteratorTsGcTestInstance,
1850
1874
  CompactionIteratorTsGcTest,
1851
1875
  testing::Values(true, false));
@@ -288,8 +288,8 @@ void CompactionJob::Prepare() {
288
288
  // to encode seqno->time to the output files.
289
289
 
290
290
  uint64_t preserve_time_duration =
291
- std::max(c->immutable_options()->preserve_internal_time_seconds,
292
- c->immutable_options()->preclude_last_level_data_seconds);
291
+ std::max(c->mutable_cf_options()->preserve_internal_time_seconds,
292
+ c->mutable_cf_options()->preclude_last_level_data_seconds);
293
293
 
294
294
  if (preserve_time_duration > 0) {
295
295
  const ReadOptions read_options(Env::IOActivity::kCompaction);
@@ -326,8 +326,8 @@ void CompactionJob::Prepare() {
326
326
  seqno_to_time_mapping_.Enforce(_current_time);
327
327
  seqno_to_time_mapping_.GetCurrentTieringCutoffSeqnos(
328
328
  static_cast<uint64_t>(_current_time),
329
- c->immutable_options()->preserve_internal_time_seconds,
330
- c->immutable_options()->preclude_last_level_data_seconds,
329
+ c->mutable_cf_options()->preserve_internal_time_seconds,
330
+ c->mutable_cf_options()->preclude_last_level_data_seconds,
331
331
  &preserve_time_min_seqno_, &preclude_last_level_min_seqno_);
332
332
  }
333
333
  // For accuracy of the GetProximalSeqnoBeforeTime queries above, we only
@@ -469,7 +469,7 @@ void CompactionJob::GenSubcompactionBoundaries() {
469
469
  ReadOptions read_options(Env::IOActivity::kCompaction);
470
470
  read_options.rate_limiter_priority = GetRateLimiterPriority();
471
471
  auto* c = compact_->compaction;
472
- if (c->immutable_options()->table_factory->Name() ==
472
+ if (c->mutable_cf_options()->table_factory->Name() ==
473
473
  TableFactory::kPlainTableName()) {
474
474
  return;
475
475
  }
@@ -506,9 +506,7 @@ void CompactionJob::GenSubcompactionBoundaries() {
506
506
  FileMetaData* f = flevel->files[i].file_metadata;
507
507
  std::vector<TableReader::Anchor> my_anchors;
508
508
  Status s = cfd->table_cache()->ApproximateKeyAnchors(
509
- read_options, icomp, *f,
510
- c->mutable_cf_options()->block_protection_bytes_per_key,
511
- my_anchors);
509
+ read_options, icomp, *f, *c->mutable_cf_options(), my_anchors);
512
510
  if (!s.ok() || my_anchors.empty()) {
513
511
  my_anchors.emplace_back(f->largest.user_key(), f->fd.GetFileSize());
514
512
  }
@@ -711,8 +709,6 @@ Status CompactionJob::Run() {
711
709
  }
712
710
  }
713
711
  ColumnFamilyData* cfd = compact_->compaction->column_family_data();
714
- auto& prefix_extractor =
715
- compact_->compaction->mutable_cf_options()->prefix_extractor;
716
712
  std::atomic<size_t> next_file_idx(0);
717
713
  auto verify_table = [&](Status& output_status) {
718
714
  while (true) {
@@ -733,7 +729,8 @@ Status CompactionJob::Run() {
733
729
  InternalIterator* iter = cfd->table_cache()->NewIterator(
734
730
  verify_table_read_options, file_options_,
735
731
  cfd->internal_comparator(), files_output[file_idx]->meta,
736
- /*range_del_agg=*/nullptr, prefix_extractor,
732
+ /*range_del_agg=*/nullptr,
733
+ *compact_->compaction->mutable_cf_options(),
737
734
  /*table_reader_ptr=*/nullptr,
738
735
  cfd->internal_stats()->GetFileReadHist(
739
736
  compact_->compaction->output_level()),
@@ -743,9 +740,7 @@ Status CompactionJob::Run() {
743
740
  *compact_->compaction->mutable_cf_options()),
744
741
  /*smallest_compaction_key=*/nullptr,
745
742
  /*largest_compaction_key=*/nullptr,
746
- /*allow_unprepared_value=*/false,
747
- compact_->compaction->mutable_cf_options()
748
- ->block_protection_bytes_per_key);
743
+ /*allow_unprepared_value=*/false);
749
744
  auto s = iter->status();
750
745
 
751
746
  if (s.ok() && paranoid_file_checks_) {
@@ -916,19 +911,23 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options,
916
911
  ROCKS_LOG_BUFFER(
917
912
  log_buffer_,
918
913
  "[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
919
- "files in(%d, %d) out(%d +%d blob) "
920
- "MB in(%.1f, %.1f +%.1f blob) out(%.1f +%.1f blob), "
914
+ "files in(%d, %d) filtered(%d, %d) out(%d +%d blob) "
915
+ "MB in(%.1f, %.1f +%.1f blob) filtered(%.1f, %.1f) out(%.1f +%.1f blob), "
921
916
  "read-write-amplify(%.1f) write-amplify(%.1f) %s, records in: %" PRIu64
922
917
  ", records dropped: %" PRIu64 " output_compression: %s\n",
923
918
  column_family_name.c_str(), vstorage->LevelSummary(&tmp),
924
919
  bytes_read_per_sec, bytes_written_per_sec,
925
920
  compact_->compaction->output_level(),
926
921
  stats.num_input_files_in_non_output_levels,
927
- stats.num_input_files_in_output_level, stats.num_output_files,
922
+ stats.num_input_files_in_output_level,
923
+ stats.num_filtered_input_files_in_non_output_levels,
924
+ stats.num_filtered_input_files_in_output_level, stats.num_output_files,
928
925
  stats.num_output_files_blob, stats.bytes_read_non_output_levels / kMB,
929
926
  stats.bytes_read_output_level / kMB, stats.bytes_read_blob / kMB,
930
- stats.bytes_written / kMB, stats.bytes_written_blob / kMB, read_write_amp,
931
- write_amp, status.ToString().c_str(), stats.num_input_records,
927
+ stats.bytes_skipped_non_output_levels / kMB,
928
+ stats.bytes_skipped_output_level / kMB, stats.bytes_written / kMB,
929
+ stats.bytes_written_blob / kMB, read_write_amp, write_amp,
930
+ status.ToString().c_str(), stats.num_input_records,
932
931
  stats.num_dropped_records,
933
932
  CompressionTypeToString(compact_->compaction->output_compression())
934
933
  .c_str());
@@ -1587,6 +1586,8 @@ Status CompactionJob::FinishCompactionOutputFile(
1587
1586
  const uint64_t current_entries = outputs.NumEntries();
1588
1587
 
1589
1588
  s = outputs.Finish(s, seqno_to_time_mapping_);
1589
+ TEST_SYNC_POINT_CALLBACK(
1590
+ "CompactionJob::FinishCompactionOutputFile()::AfterFinish", &s);
1590
1591
 
1591
1592
  if (s.ok()) {
1592
1593
  // With accurate smallest and largest key, we can get a slightly more
@@ -1919,6 +1920,10 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
1919
1920
  oldest_ancester_time = current_time;
1920
1921
  }
1921
1922
 
1923
+ uint64_t newest_key_time = sub_compact->compaction->MaxInputFileNewestKeyTime(
1924
+ sub_compact->start.has_value() ? &tmp_start : nullptr,
1925
+ sub_compact->end.has_value() ? &tmp_end : nullptr);
1926
+
1922
1927
  // Initialize a SubcompactionState::Output and add it to sub_compact->outputs
1923
1928
  uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
1924
1929
  {
@@ -1968,7 +1973,7 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
1968
1973
  cfd->internal_tbl_prop_coll_factories(),
1969
1974
  sub_compact->compaction->output_compression(),
1970
1975
  sub_compact->compaction->output_compression_opts(), cfd->GetID(),
1971
- cfd->GetName(), sub_compact->compaction->output_level(),
1976
+ cfd->GetName(), sub_compact->compaction->output_level(), newest_key_time,
1972
1977
  bottommost_level_, TableFileCreationReason::kCompaction,
1973
1978
  0 /* oldest_key_time */, current_time, db_id_, db_session_id_,
1974
1979
  sub_compact->compaction->max_output_file_size(), file_number,
@@ -2011,7 +2016,8 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
2011
2016
  for (int input_level = 0;
2012
2017
  input_level < static_cast<int>(compaction->num_input_levels());
2013
2018
  ++input_level) {
2014
- size_t num_input_files = compaction->num_input_files(input_level);
2019
+ const LevelFilesBrief* flevel = compaction->input_levels(input_level);
2020
+ size_t num_input_files = flevel->num_files;
2015
2021
  uint64_t* bytes_read;
2016
2022
  if (compaction->level(input_level) != compaction->output_level()) {
2017
2023
  compaction_stats_.stats.num_input_files_in_non_output_levels +=
@@ -2023,7 +2029,7 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
2023
2029
  bytes_read = &compaction_stats_.stats.bytes_read_output_level;
2024
2030
  }
2025
2031
  for (size_t i = 0; i < num_input_files; ++i) {
2026
- const FileMetaData* file_meta = compaction->input(input_level, i);
2032
+ const FileMetaData* file_meta = flevel->files[i].file_metadata;
2027
2033
  *bytes_read += file_meta->fd.GetFileSize();
2028
2034
  uint64_t file_input_entries = file_meta->num_entries;
2029
2035
  uint64_t file_num_range_del = file_meta->num_range_deletions;
@@ -2046,6 +2052,23 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
2046
2052
  *num_input_range_del += file_num_range_del;
2047
2053
  }
2048
2054
  }
2055
+
2056
+ const std::vector<FileMetaData*>& filtered_flevel =
2057
+ compaction->filtered_input_levels(input_level);
2058
+ size_t num_filtered_input_files = filtered_flevel.size();
2059
+ uint64_t* bytes_skipped;
2060
+ if (compaction->level(input_level) != compaction->output_level()) {
2061
+ compaction_stats_.stats.num_filtered_input_files_in_non_output_levels +=
2062
+ static_cast<int>(num_filtered_input_files);
2063
+ bytes_skipped = &compaction_stats_.stats.bytes_skipped_non_output_levels;
2064
+ } else {
2065
+ compaction_stats_.stats.num_filtered_input_files_in_output_level +=
2066
+ static_cast<int>(num_filtered_input_files);
2067
+ bytes_skipped = &compaction_stats_.stats.bytes_skipped_output_level;
2068
+ }
2069
+ for (const FileMetaData* filtered_file_meta : filtered_flevel) {
2070
+ *bytes_skipped += filtered_file_meta->fd.GetFileSize();
2071
+ }
2049
2072
  }
2050
2073
 
2051
2074
  assert(compaction_job_stats_);
@@ -2070,6 +2093,13 @@ void CompactionJob::UpdateCompactionJobStats(
2070
2093
  stats.num_input_files_in_output_level;
2071
2094
  compaction_job_stats_->num_input_files_at_output_level =
2072
2095
  stats.num_input_files_in_output_level;
2096
+ compaction_job_stats_->num_filtered_input_files =
2097
+ stats.num_filtered_input_files_in_non_output_levels +
2098
+ stats.num_filtered_input_files_in_output_level;
2099
+ compaction_job_stats_->num_filtered_input_files_at_output_level =
2100
+ stats.num_filtered_input_files_in_output_level;
2101
+ compaction_job_stats_->total_skipped_input_bytes =
2102
+ stats.bytes_skipped_non_output_levels + stats.bytes_skipped_output_level;
2073
2103
 
2074
2104
  // output information
2075
2105
  compaction_job_stats_->total_output_bytes = stats.bytes_written;
@@ -215,8 +215,7 @@ class CompactionJob {
215
215
  virtual void RecordCompactionIOStats();
216
216
  void CleanupCompaction();
217
217
 
218
- // Call compaction filter. Then iterate through input and compact the
219
- // kv-pairs
218
+ // Iterate through input and compact the kv-pairs.
220
219
  void ProcessKeyValueCompaction(SubcompactionState* sub_compact);
221
220
 
222
221
  CompactionState* compact_;
@@ -386,7 +385,7 @@ struct CompactionServiceInput {
386
385
  // files needed for this compaction, for both input level files and output
387
386
  // level files.
388
387
  std::vector<std::string> input_files;
389
- int output_level;
388
+ int output_level = 0;
390
389
 
391
390
  // db_id is used to generate unique id of sst on the remote compactor
392
391
  std::string db_id;
@@ -397,7 +396,7 @@ struct CompactionServiceInput {
397
396
  bool has_end = false;
398
397
  std::string end;
399
398
 
400
- uint64_t options_file_number;
399
+ uint64_t options_file_number = 0;
401
400
 
402
401
  // serialization interface to read and write the object
403
402
  static Status Read(const std::string& data_str, CompactionServiceInput* obj);
@@ -424,6 +423,7 @@ struct CompactionServiceOutputFile {
424
423
  uint64_t paranoid_hash;
425
424
  bool marked_for_compaction;
426
425
  UniqueId64x2 unique_id{};
426
+ TableProperties table_properties;
427
427
 
428
428
  CompactionServiceOutputFile() = default;
429
429
  CompactionServiceOutputFile(
@@ -432,7 +432,8 @@ struct CompactionServiceOutputFile {
432
432
  uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
433
433
  uint64_t _epoch_number, const std::string& _file_checksum,
434
434
  const std::string& _file_checksum_func_name, uint64_t _paranoid_hash,
435
- bool _marked_for_compaction, UniqueId64x2 _unique_id)
435
+ bool _marked_for_compaction, UniqueId64x2 _unique_id,
436
+ const TableProperties& _table_properties)
436
437
  : file_name(name),
437
438
  smallest_seqno(smallest),
438
439
  largest_seqno(largest),
@@ -445,7 +446,8 @@ struct CompactionServiceOutputFile {
445
446
  file_checksum_func_name(_file_checksum_func_name),
446
447
  paranoid_hash(_paranoid_hash),
447
448
  marked_for_compaction(_marked_for_compaction),
448
- unique_id(std::move(_unique_id)) {}
449
+ unique_id(std::move(_unique_id)),
450
+ table_properties(_table_properties) {}
449
451
  };
450
452
 
451
453
  // CompactionServiceResult contains the compaction result from a different db
@@ -454,7 +456,7 @@ struct CompactionServiceOutputFile {
454
456
  struct CompactionServiceResult {
455
457
  Status status;
456
458
  std::vector<CompactionServiceOutputFile> output_files;
457
- int output_level;
459
+ int output_level = 0;
458
460
 
459
461
  // location of the output files
460
462
  std::string output_path;
@@ -250,6 +250,7 @@ class CompactionJobTestBase : public testing::Test {
250
250
  } else {
251
251
  assert(false);
252
252
  }
253
+ mutable_cf_options_.table_factory = cf_options_.table_factory;
253
254
  }
254
255
 
255
256
  std::string GenerateFileName(uint64_t file_number) {
@@ -300,13 +301,13 @@ class CompactionJobTestBase : public testing::Test {
300
301
  const WriteOptions write_options;
301
302
  std::unique_ptr<TableBuilder> table_builder(
302
303
  cf_options_.table_factory->NewTableBuilder(
303
- TableBuilderOptions(*cfd_->ioptions(), mutable_cf_options_,
304
- read_options, write_options,
305
- cfd_->internal_comparator(),
306
- cfd_->internal_tbl_prop_coll_factories(),
307
- CompressionType::kNoCompression,
308
- CompressionOptions(), 0 /* column_family_id */,
309
- kDefaultColumnFamilyName, -1 /* level */),
304
+ TableBuilderOptions(
305
+ *cfd_->ioptions(), mutable_cf_options_, read_options,
306
+ write_options, cfd_->internal_comparator(),
307
+ cfd_->internal_tbl_prop_coll_factories(),
308
+ CompressionType::kNoCompression, CompressionOptions(),
309
+ 0 /* column_family_id */, kDefaultColumnFamilyName,
310
+ -1 /* level */, kUnknownNewestKeyTime),
310
311
  file_writer.get()));
311
312
  // Build table.
312
313
  for (const auto& kv : contents) {
@@ -651,7 +652,8 @@ class CompactionJobTestBase : public testing::Test {
651
652
  mutable_cf_options_.target_file_size_base,
652
653
  mutable_cf_options_.max_compaction_bytes, 0, kNoCompression,
653
654
  cfd->GetLatestMutableCFOptions()->compression_opts,
654
- Temperature::kUnknown, max_subcompactions, grandparents, true);
655
+ Temperature::kUnknown, max_subcompactions, grandparents,
656
+ /*earliest_snapshot*/ std::nullopt, /*snapshot_checker*/ nullptr, true);
655
657
  compaction.FinalizeInputInfo(cfd->current());
656
658
 
657
659
  assert(db_options_.info_log);
@@ -1660,6 +1662,16 @@ TEST_F(CompactionJobTest, ResultSerialization) {
1660
1662
  std::string file_checksum = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen));
1661
1663
  std::string file_checksum_func_name = "MyAwesomeChecksumGenerator";
1662
1664
  while (!rnd.OneIn(10)) {
1665
+ TableProperties tp;
1666
+ tp.user_collected_properties.emplace(
1667
+ "UCP_Key1", rnd.RandomString(rnd.Uniform(kStrMaxLen)));
1668
+ tp.user_collected_properties.emplace(
1669
+ "UCP_Key2", rnd.RandomString(rnd.Uniform(kStrMaxLen)));
1670
+ tp.readable_properties.emplace("RP_Key1",
1671
+ rnd.RandomString(rnd.Uniform(kStrMaxLen)));
1672
+ tp.readable_properties.emplace("RP_K2y2",
1673
+ rnd.RandomString(rnd.Uniform(kStrMaxLen)));
1674
+
1663
1675
  UniqueId64x2 id{rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX)};
1664
1676
  result.output_files.emplace_back(
1665
1677
  rnd.RandomString(rnd.Uniform(kStrMaxLen)) /* file_name */,
@@ -1675,7 +1687,7 @@ TEST_F(CompactionJobTest, ResultSerialization) {
1675
1687
  file_checksum /* file_checksum */,
1676
1688
  file_checksum_func_name /* file_checksum_func_name */,
1677
1689
  rnd64.Uniform(UINT64_MAX) /* paranoid_hash */,
1678
- rnd.OneIn(2) /* marked_for_compaction */, id);
1690
+ rnd.OneIn(2) /* marked_for_compaction */, id /* unique_id */, tp);
1679
1691
  }
1680
1692
  result.output_level = rnd.Uniform(10);
1681
1693
  result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
@@ -1696,6 +1708,21 @@ TEST_F(CompactionJobTest, ResultSerialization) {
1696
1708
  ASSERT_OK(CompactionServiceResult::Read(output, &deserialized1));
1697
1709
  ASSERT_TRUE(deserialized1.TEST_Equals(&result));
1698
1710
 
1711
+ for (size_t i = 0; i < result.output_files.size(); i++) {
1712
+ for (const auto& prop :
1713
+ result.output_files[i].table_properties.user_collected_properties) {
1714
+ ASSERT_EQ(deserialized1.output_files[i]
1715
+ .table_properties.user_collected_properties[prop.first],
1716
+ prop.second);
1717
+ }
1718
+ for (const auto& prop :
1719
+ result.output_files[i].table_properties.readable_properties) {
1720
+ ASSERT_EQ(deserialized1.output_files[i]
1721
+ .table_properties.readable_properties[prop.first],
1722
+ prop.second);
1723
+ }
1724
+ }
1725
+
1699
1726
  // Test mismatch
1700
1727
  deserialized1.stats.num_input_files += 10;
1701
1728
  std::string mismatch;
@@ -108,6 +108,12 @@ class CompactionOutputs {
108
108
  Status Finish(const Status& intput_status,
109
109
  const SeqnoToTimeMapping& seqno_to_time_mapping);
110
110
 
111
+ // Update output table properties from already populated TableProperties.
112
+ // Used for remote compaction
113
+ void UpdateTableProperties(const TableProperties& table_properties) {
114
+ current_output().table_properties =
115
+ std::make_shared<TableProperties>(table_properties);
116
+ }
111
117
  // Update output table properties from table builder
112
118
  void UpdateTableProperties() {
113
119
  current_output().table_properties =
@@ -351,11 +351,11 @@ Compaction* CompactionPicker::CompactFiles(
351
351
  break;
352
352
  }
353
353
  }
354
- assert(output_level == 0 ||
355
- !FilesRangeOverlapWithCompaction(
356
- input_files, output_level,
357
- Compaction::EvaluatePenultimateLevel(vstorage, ioptions_,
358
- start_level, output_level)));
354
+ assert(output_level == 0 || !FilesRangeOverlapWithCompaction(
355
+ input_files, output_level,
356
+ Compaction::EvaluatePenultimateLevel(
357
+ vstorage, mutable_cf_options, ioptions_,
358
+ start_level, output_level)));
359
359
  #endif /* !NDEBUG */
360
360
 
361
361
  CompressionType compression_type;
@@ -380,7 +380,8 @@ Compaction* CompactionPicker::CompactFiles(
380
380
  GetCompressionOptions(mutable_cf_options, vstorage, output_level),
381
381
  mutable_cf_options.default_write_temperature,
382
382
  compact_options.max_subcompactions,
383
- /* grandparents */ {}, true);
383
+ /* grandparents */ {}, /* earliest_snapshot */ std::nullopt,
384
+ /* snapshot_checker */ nullptr, true);
384
385
  RegisterCompaction(c);
385
386
  return c;
386
387
  }
@@ -658,8 +659,9 @@ Compaction* CompactionPicker::CompactRange(
658
659
  // overlaping outputs in the same level.
659
660
  if (FilesRangeOverlapWithCompaction(
660
661
  inputs, output_level,
661
- Compaction::EvaluatePenultimateLevel(vstorage, ioptions_,
662
- start_level, output_level))) {
662
+ Compaction::EvaluatePenultimateLevel(vstorage, mutable_cf_options,
663
+ ioptions_, start_level,
664
+ output_level))) {
663
665
  // This compaction output could potentially conflict with the output
664
666
  // of a currently running compaction, we cannot run it.
665
667
  *manual_conflict = true;
@@ -677,7 +679,9 @@ Compaction* CompactionPicker::CompactRange(
677
679
  GetCompressionOptions(mutable_cf_options, vstorage, output_level),
678
680
  mutable_cf_options.default_write_temperature,
679
681
  compact_range_options.max_subcompactions,
680
- /* grandparents */ {}, /* is manual */ true, trim_ts, /* score */ -1,
682
+ /* grandparents */ {}, /* earliest_snapshot */ std::nullopt,
683
+ /* snapshot_checker */ nullptr,
684
+ /* is manual */ true, trim_ts, /* score */ -1,
681
685
  /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
682
686
  CompactionReason::kUnknown,
683
687
  compact_range_options.blob_garbage_collection_policy,
@@ -843,7 +847,8 @@ Compaction* CompactionPicker::CompactRange(
843
847
  // overlaping outputs in the same level.
844
848
  if (FilesRangeOverlapWithCompaction(
845
849
  compaction_inputs, output_level,
846
- Compaction::EvaluatePenultimateLevel(vstorage, ioptions_, input_level,
850
+ Compaction::EvaluatePenultimateLevel(vstorage, mutable_cf_options,
851
+ ioptions_, input_level,
847
852
  output_level))) {
848
853
  // This compaction output could potentially conflict with the output
849
854
  // of a currently running compaction, we cannot run it.
@@ -866,6 +871,7 @@ Compaction* CompactionPicker::CompactRange(
866
871
  GetCompressionOptions(mutable_cf_options, vstorage, output_level),
867
872
  mutable_cf_options.default_write_temperature,
868
873
  compact_range_options.max_subcompactions, std::move(grandparents),
874
+ /* earliest_snapshot */ std::nullopt, /* snapshot_checker */ nullptr,
869
875
  /* is manual */ true, trim_ts, /* score */ -1,
870
876
  /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
871
877
  CompactionReason::kUnknown,
@@ -1045,10 +1051,12 @@ Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
1045
1051
  }
1046
1052
 
1047
1053
  Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
1048
- std::unordered_set<uint64_t>* input_files,
1049
- const ColumnFamilyMetaData& cf_meta, const int output_level,
1050
- const VersionStorageInfo* vstorage,
1054
+ std::unordered_set<uint64_t>* input_files, const int output_level,
1055
+ Version* version,
1051
1056
  std::vector<CompactionInputFiles>* converted_input_files) const {
1057
+ ColumnFamilyMetaData cf_meta;
1058
+ version->GetColumnFamilyMetaData(&cf_meta);
1059
+
1052
1060
  assert(static_cast<int>(cf_meta.levels.size()) - 1 ==
1053
1061
  cf_meta.levels[cf_meta.levels.size() - 1].level);
1054
1062
  assert(converted_input_files);
@@ -1119,7 +1127,8 @@ Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
1119
1127
  }
1120
1128
 
1121
1129
  s = GetCompactionInputsFromFileNumbers(converted_input_files, input_files,
1122
- vstorage, CompactionOptions());
1130
+ version->storage_info(),
1131
+ CompactionOptions());
1123
1132
  if (!s.ok()) {
1124
1133
  return s;
1125
1134
  }
@@ -1128,8 +1137,8 @@ Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
1128
1137
  FilesRangeOverlapWithCompaction(
1129
1138
  *converted_input_files, output_level,
1130
1139
  Compaction::EvaluatePenultimateLevel(
1131
- vstorage, ioptions_, (*converted_input_files)[0].level,
1132
- output_level))) {
1140
+ version->storage_info(), version->GetMutableCFOptions(),
1141
+ ioptions_, (*converted_input_files)[0].level, output_level))) {
1133
1142
  return Status::Aborted(
1134
1143
  "A running compaction is writing to the same output level(s) in an "
1135
1144
  "overlapping key range");
@@ -1171,7 +1180,8 @@ void CompactionPicker::UnregisterCompaction(Compaction* c) {
1171
1180
 
1172
1181
  void CompactionPicker::PickFilesMarkedForCompaction(
1173
1182
  const std::string& cf_name, VersionStorageInfo* vstorage, int* start_level,
1174
- int* output_level, CompactionInputFiles* start_level_inputs) {
1183
+ int* output_level, CompactionInputFiles* start_level_inputs,
1184
+ std::function<bool(const FileMetaData*)> skip_marked_file) {
1175
1185
  if (vstorage->FilesMarkedForCompaction().empty()) {
1176
1186
  return;
1177
1187
  }
@@ -1181,6 +1191,9 @@ void CompactionPicker::PickFilesMarkedForCompaction(
1181
1191
  // If this assert() fails that means that some function marked some
1182
1192
  // files as being_compacted, but didn't call ComputeCompactionScore()
1183
1193
  assert(!level_file.second->being_compacted);
1194
+ if (skip_marked_file(level_file.second)) {
1195
+ return false;
1196
+ }
1184
1197
  *start_level = level_file.first;
1185
1198
  *output_level =
1186
1199
  (*start_level == 0) ? vstorage->base_level() : *start_level + 1;