rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -25,6 +25,7 @@ namespace ROCKSDB_NAMESPACE {
25
25
  CompactionIterator::CompactionIterator(
26
26
  InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
27
27
  SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
28
+ SequenceNumber earliest_snapshot,
28
29
  SequenceNumber earliest_write_conflict_snapshot,
29
30
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
30
31
  Env* env, bool report_detailed_time, bool expect_valid_internal_key,
@@ -40,7 +41,7 @@ CompactionIterator::CompactionIterator(
40
41
  const SequenceNumber preserve_time_min_seqno,
41
42
  const SequenceNumber preclude_last_level_min_seqno)
42
43
  : CompactionIterator(
43
- input, cmp, merge_helper, last_sequence, snapshots,
44
+ input, cmp, merge_helper, last_sequence, snapshots, earliest_snapshot,
44
45
  earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
45
46
  report_detailed_time, expect_valid_internal_key, range_del_agg,
46
47
  blob_file_builder, allow_data_in_errors, enforce_single_del_contracts,
@@ -54,6 +55,7 @@ CompactionIterator::CompactionIterator(
54
55
  CompactionIterator::CompactionIterator(
55
56
  InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
56
57
  SequenceNumber /*last_sequence*/, std::vector<SequenceNumber>* snapshots,
58
+ SequenceNumber earliest_snapshot,
57
59
  SequenceNumber earliest_write_conflict_snapshot,
58
60
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
59
61
  Env* env, bool report_detailed_time, bool expect_valid_internal_key,
@@ -91,9 +93,7 @@ CompactionIterator::CompactionIterator(
91
93
  // snapshots_ cannot be nullptr, but we will assert later in the body of
92
94
  // the constructor.
93
95
  visible_at_tip_(snapshots_ ? snapshots_->empty() : false),
94
- earliest_snapshot_(!snapshots_ || snapshots_->empty()
95
- ? kMaxSequenceNumber
96
- : snapshots_->at(0)),
96
+ earliest_snapshot_(earliest_snapshot),
97
97
  info_log_(info_log),
98
98
  allow_data_in_errors_(allow_data_in_errors),
99
99
  enforce_single_del_contracts_(enforce_single_del_contracts),
@@ -997,25 +997,37 @@ void CompactionIterator::NextFromInput() {
997
997
  // A special case involving range deletion is handled separately below.
998
998
  auto [unpacked_value, preferred_seqno] =
999
999
  ParsePackedValueWithSeqno(value_);
1000
- assert(preferred_seqno < ikey_.sequence);
1001
- InternalKey ikey_after_swap(ikey_.user_key, preferred_seqno, kTypeValue);
1002
- Slice ikey_after_swap_slice(*ikey_after_swap.rep());
1000
+ assert(preferred_seqno < ikey_.sequence || ikey_.sequence == 0);
1003
1001
  if (range_del_agg_->ShouldDelete(
1004
- ikey_after_swap_slice,
1005
- RangeDelPositioningMode::kForwardTraversal)) {
1006
- // A range tombstone that doesn't cover this kTypeValuePreferredSeqno
1007
- // entry may end up covering the entry, so it's not safe to swap
1008
- // preferred sequence number. In this case, we output the entry as is.
1009
- validity_info_.SetValid(ValidContext::kNewUserKey);
1002
+ key_, RangeDelPositioningMode::kForwardTraversal)) {
1003
+ ++iter_stats_.num_record_drop_hidden;
1004
+ ++iter_stats_.num_record_drop_range_del;
1005
+ AdvanceInputIter();
1010
1006
  } else {
1011
- iter_stats_.num_timed_put_swap_preferred_seqno++;
1012
- ikey_.sequence = preferred_seqno;
1013
- ikey_.type = kTypeValue;
1014
- current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
1015
- key_ = current_key_.GetInternalKey();
1016
- ikey_.user_key = current_key_.GetUserKey();
1017
- value_ = unpacked_value;
1018
- validity_info_.SetValid(ValidContext::kSwapPreferredSeqno);
1007
+ InternalKey ikey_after_swap(ikey_.user_key,
1008
+ std::min(preferred_seqno, ikey_.sequence),
1009
+ kTypeValue);
1010
+ Slice ikey_after_swap_slice(*ikey_after_swap.rep());
1011
+ if (range_del_agg_->ShouldDelete(
1012
+ ikey_after_swap_slice,
1013
+ RangeDelPositioningMode::kForwardTraversal)) {
1014
+ // A range tombstone that doesn't cover this kTypeValuePreferredSeqno
1015
+ // entry will end up covering the entry, so it's not safe to swap
1016
+ // preferred sequence number. In this case, we output the entry as is.
1017
+ validity_info_.SetValid(ValidContext::kNewUserKey);
1018
+ } else {
1019
+ if (ikey_.sequence != 0) {
1020
+ iter_stats_.num_timed_put_swap_preferred_seqno++;
1021
+ saved_seq_for_penul_check_ = ikey_.sequence;
1022
+ ikey_.sequence = preferred_seqno;
1023
+ }
1024
+ ikey_.type = kTypeValue;
1025
+ current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
1026
+ key_ = current_key_.GetInternalKey();
1027
+ ikey_.user_key = current_key_.GetUserKey();
1028
+ value_ = unpacked_value;
1029
+ validity_info_.SetValid(ValidContext::kSwapPreferredSeqno);
1030
+ }
1019
1031
  }
1020
1032
  } else if (ikey_.type == kTypeMerge) {
1021
1033
  if (!merge_helper_->HasOperator()) {
@@ -1268,6 +1280,21 @@ void CompactionIterator::DecideOutputLevel() {
1268
1280
  }
1269
1281
  #endif // NDEBUG
1270
1282
 
1283
+ // saved_seq_for_penul_check_ is populated in `NextFromInput` when the
1284
+ // entry's sequence number is non zero and validity context for output this
1285
+ // entry is kSwapPreferredSeqno for use in `DecideOutputLevel`. It should be
1286
+ // cleared out here unconditionally. Otherwise, it may end up getting consumed
1287
+ // incorrectly by a different entry.
1288
+ SequenceNumber seq_for_range_check =
1289
+ (saved_seq_for_penul_check_.has_value() &&
1290
+ saved_seq_for_penul_check_.value() != kMaxSequenceNumber)
1291
+ ? saved_seq_for_penul_check_.value()
1292
+ : ikey_.sequence;
1293
+ saved_seq_for_penul_check_ = std::nullopt;
1294
+ ParsedInternalKey ikey_for_range_check = ikey_;
1295
+ if (seq_for_range_check != ikey_.sequence) {
1296
+ ikey_for_range_check.sequence = seq_for_range_check;
1297
+ }
1271
1298
  if (output_to_penultimate_level_) {
1272
1299
  // If it's decided to output to the penultimate level, but unsafe to do so,
1273
1300
  // still output to the last level. For example, moving the data from a lower
@@ -1276,7 +1303,7 @@ void CompactionIterator::DecideOutputLevel() {
1276
1303
  // not from this compaction.
1277
1304
  // TODO: add statistic for declined output_to_penultimate_level
1278
1305
  bool safe_to_penultimate_level =
1279
- compaction_->WithinPenultimateLevelOutputRange(ikey_);
1306
+ compaction_->WithinPenultimateLevelOutputRange(ikey_for_range_check);
1280
1307
  if (!safe_to_penultimate_level) {
1281
1308
  output_to_penultimate_level_ = false;
1282
1309
  // It could happen when disable/enable `last_level_temperature` while
@@ -1288,7 +1315,7 @@ void CompactionIterator::DecideOutputLevel() {
1288
1315
  // snapshot is released before enabling `last_level_temperature` feature
1289
1316
  // We will migrate the feature to `last_level_temperature` and maybe make
1290
1317
  // it not dynamically changeable.
1291
- if (ikey_.sequence > earliest_snapshot_) {
1318
+ if (seq_for_range_check > earliest_snapshot_) {
1292
1319
  status_ = Status::Corruption(
1293
1320
  "Unsafe to store Seq later than snapshot in the last level if "
1294
1321
  "per_key_placement is enabled");
@@ -203,6 +203,7 @@ class CompactionIterator {
203
203
  CompactionIterator(
204
204
  InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
205
205
  SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
206
+ SequenceNumber earliest_snapshot,
206
207
  SequenceNumber earliest_write_conflict_snapshot,
207
208
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
208
209
  Env* env, bool report_detailed_time, bool expect_valid_internal_key,
@@ -222,6 +223,7 @@ class CompactionIterator {
222
223
  CompactionIterator(
223
224
  InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
224
225
  SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
226
+ SequenceNumber earliest_snapshot,
225
227
  SequenceNumber earliest_write_conflict_snapshot,
226
228
  SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
227
229
  Env* env, bool report_detailed_time, bool expect_valid_internal_key,
@@ -437,6 +439,17 @@ class CompactionIterator {
437
439
  // iterator output (or current key in the underlying iterator during
438
440
  // NextFromInput()).
439
441
  ParsedInternalKey ikey_;
442
+
443
+ // When a kTypeValuePreferredSeqno entry's preferred seqno is safely swapped
444
+ // in in this compaction, this field saves its original sequence number for
445
+ // range checking whether it's safe to be placed on the penultimate level.
446
+ // This is to ensure when such an entry happens to be the right boundary of
447
+ // penultimate safe range, it won't get excluded because with the preferred
448
+ // seqno swapped in, it's now larger than the right boundary (itself before
449
+ // the swap). This is safe to do, because preferred seqno is swapped in only
450
+ // when no entries with the same user key exist on lower levels and this entry
451
+ // is already visible in the earliest snapshot.
452
+ std::optional<SequenceNumber> saved_seq_for_penul_check_ = kMaxSequenceNumber;
440
453
  // Stores whether ikey_.user_key is valid. If set to false, the user key is
441
454
  // not compared against the current key in the underlying iterator.
442
455
  bool has_current_user_key_ = false;
@@ -174,6 +174,9 @@ CompactionJob::CompactionJob(
174
174
  db_mutex_(db_mutex),
175
175
  db_error_handler_(db_error_handler),
176
176
  existing_snapshots_(std::move(existing_snapshots)),
177
+ earliest_snapshot_(existing_snapshots_.empty()
178
+ ? kMaxSequenceNumber
179
+ : existing_snapshots_.at(0)),
177
180
  earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
178
181
  snapshot_checker_(snapshot_checker),
179
182
  job_context_(job_context),
@@ -282,6 +285,7 @@ void CompactionJob::Prepare() {
282
285
 
283
286
  // collect all seqno->time information from the input files which will be used
284
287
  // to encode seqno->time to the output files.
288
+
285
289
  uint64_t preserve_time_duration =
286
290
  std::max(c->immutable_options()->preserve_internal_time_seconds,
287
291
  c->immutable_options()->preclude_last_level_data_seconds);
@@ -319,28 +323,11 @@ void CompactionJob::Prepare() {
319
323
  seqno_to_time_mapping_.Enforce();
320
324
  } else {
321
325
  seqno_to_time_mapping_.Enforce(_current_time);
322
- uint64_t preserve_time =
323
- static_cast<uint64_t>(_current_time) > preserve_time_duration
324
- ? _current_time - preserve_time_duration
325
- : 0;
326
- // GetProximalSeqnoBeforeTime tells us the last seqno known to have been
327
- // written at or before the given time. + 1 to get the minimum we should
328
- // preserve without excluding anything that might have been written on or
329
- // after the given time.
330
- preserve_time_min_seqno_ =
331
- seqno_to_time_mapping_.GetProximalSeqnoBeforeTime(preserve_time) + 1;
332
- if (c->immutable_options()->preclude_last_level_data_seconds > 0) {
333
- uint64_t preclude_last_level_time =
334
- static_cast<uint64_t>(_current_time) >
335
- c->immutable_options()->preclude_last_level_data_seconds
336
- ? _current_time -
337
- c->immutable_options()->preclude_last_level_data_seconds
338
- : 0;
339
- preclude_last_level_min_seqno_ =
340
- seqno_to_time_mapping_.GetProximalSeqnoBeforeTime(
341
- preclude_last_level_time) +
342
- 1;
343
- }
326
+ seqno_to_time_mapping_.GetCurrentTieringCutoffSeqnos(
327
+ static_cast<uint64_t>(_current_time),
328
+ c->immutable_options()->preserve_internal_time_seconds,
329
+ c->immutable_options()->preclude_last_level_data_seconds,
330
+ &preserve_time_min_seqno_, &preclude_last_level_min_seqno_);
344
331
  }
345
332
  // For accuracy of the GetProximalSeqnoBeforeTime queries above, we only
346
333
  // limit the capacity after them.
@@ -481,6 +468,11 @@ void CompactionJob::GenSubcompactionBoundaries() {
481
468
  ReadOptions read_options(Env::IOActivity::kCompaction);
482
469
  read_options.rate_limiter_priority = GetRateLimiterPriority();
483
470
  auto* c = compact_->compaction;
471
+ if (c->immutable_options()->table_factory->Name() ==
472
+ TableFactory::kPlainTableName()) {
473
+ return;
474
+ }
475
+
484
476
  if (c->max_subcompactions() <= 1 &&
485
477
  !(c->immutable_options()->compaction_pri == kRoundRobin &&
486
478
  c->immutable_options()->compaction_style == kCompactionStyleLevel)) {
@@ -634,6 +626,7 @@ Status CompactionJob::Run() {
634
626
  const size_t num_threads = compact_->sub_compact_states.size();
635
627
  assert(num_threads > 0);
636
628
  const uint64_t start_micros = db_options_.clock->NowMicros();
629
+ compact_->compaction->GetOrInitInputTableProperties();
637
630
 
638
631
  // Launch a thread for each of subcompactions 1...num_threads-1
639
632
  std::vector<port::Thread> thread_pool;
@@ -1295,8 +1288,9 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1295
1288
 
1296
1289
  auto c_iter = std::make_unique<CompactionIterator>(
1297
1290
  input, cfd->user_comparator(), &merge, versions_->LastSequence(),
1298
- &existing_snapshots_, earliest_write_conflict_snapshot_, job_snapshot_seq,
1299
- snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_),
1291
+ &existing_snapshots_, earliest_snapshot_,
1292
+ earliest_write_conflict_snapshot_, job_snapshot_seq, snapshot_checker_,
1293
+ env_, ShouldReportDetailedTime(env_, stats_),
1300
1294
  /*expect_valid_internal_key=*/true, range_del_agg.get(),
1301
1295
  blob_file_builder.get(), db_options_.allow_data_in_errors,
1302
1296
  db_options_.enforce_single_del_contracts, manual_compaction_canceled_,
@@ -1969,7 +1963,10 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
1969
1963
  cfd->GetName(), sub_compact->compaction->output_level(),
1970
1964
  bottommost_level_, TableFileCreationReason::kCompaction,
1971
1965
  0 /* oldest_key_time */, current_time, db_id_, db_session_id_,
1972
- sub_compact->compaction->max_output_file_size(), file_number);
1966
+ sub_compact->compaction->max_output_file_size(), file_number,
1967
+ preclude_last_level_min_seqno_ == kMaxSequenceNumber
1968
+ ? preclude_last_level_min_seqno_
1969
+ : std::min(earliest_snapshot_, preclude_last_level_min_seqno_));
1973
1970
 
1974
1971
  outputs.NewBuilder(tboptions);
1975
1972
 
@@ -314,6 +314,8 @@ class CompactionJob {
314
314
  // deleted because that version is not visible in any snapshot.
315
315
  std::vector<SequenceNumber> existing_snapshots_;
316
316
 
317
+ SequenceNumber earliest_snapshot_;
318
+
317
319
  // This is the earliest snapshot that could be used for write-conflict
318
320
  // checking by a transaction. For any user-key newer than this snapshot, we
319
321
  // should make sure not to remove evidence that a write occurred.
@@ -27,7 +27,9 @@ Status CompactionOutputs::Finish(
27
27
  if (s.ok()) {
28
28
  SeqnoToTimeMapping relevant_mapping;
29
29
  relevant_mapping.CopyFromSeqnoRange(
30
- seqno_to_time_mapping, meta->fd.smallest_seqno, meta->fd.largest_seqno);
30
+ seqno_to_time_mapping,
31
+ std::min(smallest_preferred_seqno_, meta->fd.smallest_seqno),
32
+ meta->fd.largest_seqno);
31
33
  relevant_mapping.SetCapacity(kMaxSeqnoTimePairsPerSST);
32
34
  builder_->SetSeqnoTimeTableProperties(relevant_mapping,
33
35
  meta->oldest_ancester_time);
@@ -422,6 +424,11 @@ Status CompactionOutputs::AddToOutput(
422
424
  }
423
425
 
424
426
  const ParsedInternalKey& ikey = c_iter.ikey();
427
+ if (ikey.type == kTypeValuePreferredSeqno) {
428
+ SequenceNumber preferred_seqno = ParsePackedValueForSeqno(value);
429
+ smallest_preferred_seqno_ =
430
+ std::min(smallest_preferred_seqno_, preferred_seqno);
431
+ }
425
432
  s = current_output().meta.UpdateBoundaries(key, value, ikey.sequence,
426
433
  ikey.type);
427
434
 
@@ -297,6 +297,7 @@ class CompactionOutputs {
297
297
  std::unique_ptr<TableBuilder> builder_;
298
298
  std::unique_ptr<WritableFileWriter> file_writer_;
299
299
  uint64_t current_output_file_size_ = 0;
300
+ SequenceNumber smallest_preferred_seqno_ = kMaxSequenceNumber;
300
301
 
301
302
  // all the compaction outputs so far
302
303
  std::vector<Output> outputs_;
@@ -340,8 +340,8 @@ Compaction* CompactionPicker::CompactFiles(
340
340
  #ifndef NDEBUG
341
341
  assert(input_files.size());
342
342
  // This compaction output should not overlap with a running compaction as
343
- // `SanitizeCompactionInputFiles` should've checked earlier and db mutex
344
- // shouldn't have been released since.
343
+ // `SanitizeAndConvertCompactionInputFiles` should've checked earlier and db
344
+ // mutex shouldn't have been released since.
345
345
  int start_level = Compaction::kInvalidLevel;
346
346
  for (const auto& in : input_files) {
347
347
  // input_files should already be sorted by level
@@ -457,9 +457,10 @@ bool CompactionPicker::IsRangeInCompaction(VersionStorageInfo* vstorage,
457
457
  // Returns false if files on parent level are currently in compaction, which
458
458
  // means that we can't compact them
459
459
  bool CompactionPicker::SetupOtherInputs(
460
- const std::string& cf_name, VersionStorageInfo* vstorage,
461
- CompactionInputFiles* inputs, CompactionInputFiles* output_level_inputs,
462
- int* parent_index, int base_index, bool only_expand_towards_right) {
460
+ const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
461
+ VersionStorageInfo* vstorage, CompactionInputFiles* inputs,
462
+ CompactionInputFiles* output_level_inputs, int* parent_index,
463
+ int base_index, bool only_expand_towards_right) {
463
464
  assert(!inputs->empty());
464
465
  assert(output_level_inputs->empty());
465
466
  const int input_level = inputs->level;
@@ -525,8 +526,15 @@ bool CompactionPicker::SetupOtherInputs(
525
526
  if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) {
526
527
  try_overlapping_inputs = false;
527
528
  }
529
+ // It helps to reduce write amp and avoid a further separate compaction
530
+ // to include more input level files without expanding output level files.
531
+ // So we apply a softer limit. We still need a limit to avoid overly large
532
+ // compactions and potential high space amp spikes.
533
+ const uint64_t limit =
534
+ MultiplyCheckOverflow(mutable_cf_options.max_compaction_bytes, 2.0);
528
535
  if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() &&
529
- !AreFilesInCompaction(expanded_inputs.files)) {
536
+ !AreFilesInCompaction(expanded_inputs.files) &&
537
+ output_level_inputs_size + expanded_inputs_size < limit) {
530
538
  InternalKey new_start, new_limit;
531
539
  GetRange(expanded_inputs, &new_start, &new_limit);
532
540
  CompactionInputFiles expanded_output_level_inputs;
@@ -548,7 +556,8 @@ bool CompactionPicker::SetupOtherInputs(
548
556
  base_index, nullptr);
549
557
  expanded_inputs_size = TotalFileSize(expanded_inputs.files);
550
558
  if (expanded_inputs.size() > inputs->size() &&
551
- !AreFilesInCompaction(expanded_inputs.files)) {
559
+ !AreFilesInCompaction(expanded_inputs.files) &&
560
+ (output_level_inputs_size + expanded_inputs_size) < limit) {
552
561
  expand_inputs = true;
553
562
  }
554
563
  }
@@ -808,8 +817,8 @@ Compaction* CompactionPicker::CompactRange(
808
817
  output_level_inputs.level = output_level;
809
818
  if (input_level != output_level) {
810
819
  int parent_index = -1;
811
- if (!SetupOtherInputs(cf_name, vstorage, &inputs, &output_level_inputs,
812
- &parent_index, -1)) {
820
+ if (!SetupOtherInputs(cf_name, mutable_cf_options, vstorage, &inputs,
821
+ &output_level_inputs, &parent_index, -1)) {
813
822
  // manual compaction is now multi-threaded, so it can
814
823
  // happen that SetupOtherInputs fails
815
824
  // we handle it higher in RunManualCompaction
@@ -1031,19 +1040,18 @@ Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
1031
1040
  }
1032
1041
  }
1033
1042
  }
1034
- if (RangeOverlapWithCompaction(smallestkey, largestkey, output_level)) {
1035
- return Status::Aborted(
1036
- "A running compaction is writing to the same output level in an "
1037
- "overlapping key range");
1038
- }
1039
1043
  return Status::OK();
1040
1044
  }
1041
1045
 
1042
- Status CompactionPicker::SanitizeCompactionInputFiles(
1046
+ Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
1043
1047
  std::unordered_set<uint64_t>* input_files,
1044
- const ColumnFamilyMetaData& cf_meta, const int output_level) const {
1048
+ const ColumnFamilyMetaData& cf_meta, const int output_level,
1049
+ const VersionStorageInfo* vstorage,
1050
+ std::vector<CompactionInputFiles>* converted_input_files) const {
1045
1051
  assert(static_cast<int>(cf_meta.levels.size()) - 1 ==
1046
1052
  cf_meta.levels[cf_meta.levels.size() - 1].level);
1053
+ assert(converted_input_files);
1054
+
1047
1055
  if (output_level >= static_cast<int>(cf_meta.levels.size())) {
1048
1056
  return Status::InvalidArgument(
1049
1057
  "Output level for column family " + cf_meta.name +
@@ -1069,7 +1077,6 @@ Status CompactionPicker::SanitizeCompactionInputFiles(
1069
1077
 
1070
1078
  Status s = SanitizeCompactionInputFilesForAllLevels(input_files, cf_meta,
1071
1079
  output_level);
1072
-
1073
1080
  if (!s.ok()) {
1074
1081
  return s;
1075
1082
  }
@@ -1110,6 +1117,22 @@ Status CompactionPicker::SanitizeCompactionInputFiles(
1110
1117
  }
1111
1118
  }
1112
1119
 
1120
+ s = GetCompactionInputsFromFileNumbers(converted_input_files, input_files,
1121
+ vstorage, CompactionOptions());
1122
+ if (!s.ok()) {
1123
+ return s;
1124
+ }
1125
+ assert(converted_input_files->size() > 0);
1126
+ if (output_level != 0 &&
1127
+ FilesRangeOverlapWithCompaction(
1128
+ *converted_input_files, output_level,
1129
+ Compaction::EvaluatePenultimateLevel(
1130
+ vstorage, ioptions_, (*converted_input_files)[0].level,
1131
+ output_level))) {
1132
+ return Status::Aborted(
1133
+ "A running compaction is writing to the same output level(s) in an "
1134
+ "overlapping key range");
1135
+ }
1113
1136
  return Status::OK();
1114
1137
  }
1115
1138
 
@@ -86,16 +86,20 @@ class CompactionPicker {
86
86
 
87
87
  virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0;
88
88
 
89
- // Sanitize the input set of compaction input files.
90
- // When the input parameters do not describe a valid compaction, the
91
- // function will try to fix the input_files by adding necessary
92
- // files. If it's not possible to conver an invalid input_files
93
- // into a valid one by adding more files, the function will return a
94
- // non-ok status with specific reason.
95
- //
96
- Status SanitizeCompactionInputFiles(std::unordered_set<uint64_t>* input_files,
97
- const ColumnFamilyMetaData& cf_meta,
98
- const int output_level) const;
89
+ // Sanitize the input set of compaction input files and convert it to
90
+ // `std::vector<CompactionInputFiles>` in the output parameter
91
+ // `converted_input_files`.
92
+ // When the input parameters do not describe a valid
93
+ // compaction, the function will try to fix the input_files by adding
94
+ // necessary files. If it's not possible to convert an invalid input_files
95
+ // into a valid one by adding more files, the function will return a
96
+ // non-ok status with specific reason.
97
+ //
98
+ Status SanitizeAndConvertCompactionInputFiles(
99
+ std::unordered_set<uint64_t>* input_files,
100
+ const ColumnFamilyMetaData& cf_meta, const int output_level,
101
+ const VersionStorageInfo* vstorage,
102
+ std::vector<CompactionInputFiles>* converted_input_files) const;
99
103
 
100
104
  // Free up the files that participated in a compaction
101
105
  //
@@ -109,8 +113,8 @@ class CompactionPicker {
109
113
  // object.
110
114
  //
111
115
  // Caller must provide a set of input files that has been passed through
112
- // `SanitizeCompactionInputFiles` earlier. The lock should not be released
113
- // between that call and this one.
116
+ // `SanitizeAndConvertCompactionInputFiles` earlier. The lock should not be
117
+ // released between that call and this one.
114
118
  Compaction* CompactFiles(const CompactionOptions& compact_options,
115
119
  const std::vector<CompactionInputFiles>& input_files,
116
120
  int output_level, VersionStorageInfo* vstorage,
@@ -120,6 +124,7 @@ class CompactionPicker {
120
124
 
121
125
  // Converts a set of compaction input file numbers into
122
126
  // a list of CompactionInputFiles.
127
+ // TODO(hx235): remove the unused paramter `compact_options`
123
128
  Status GetCompactionInputsFromFileNumbers(
124
129
  std::vector<CompactionInputFiles>* input_files,
125
130
  std::unordered_set<uint64_t>* input_set,
@@ -186,6 +191,7 @@ class CompactionPicker {
186
191
  int penultimate_level) const;
187
192
 
188
193
  bool SetupOtherInputs(const std::string& cf_name,
194
+ const MutableCFOptions& mutable_cf_options,
189
195
  VersionStorageInfo* vstorage,
190
196
  CompactionInputFiles* inputs,
191
197
  CompactionInputFiles* output_level_inputs,
@@ -224,8 +230,8 @@ class CompactionPicker {
224
230
  protected:
225
231
  const ImmutableOptions& ioptions_;
226
232
 
227
- // A helper function to SanitizeCompactionInputFiles() that
228
- // sanitizes "input_files" by adding necessary files.
233
+ // A helper function to SanitizeAndConvertCompactionInputFiles() that
234
+ // sanitizes "input_files" by adding necessary files.
229
235
  virtual Status SanitizeCompactionInputFilesForAllLevels(
230
236
  std::unordered_set<uint64_t>* input_files,
231
237
  const ColumnFamilyMetaData& cf_meta, const int output_level) const;
@@ -467,8 +467,9 @@ bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() {
467
467
  }
468
468
  if (!is_l0_trivial_move_ &&
469
469
  !compaction_picker_->SetupOtherInputs(
470
- cf_name_, vstorage_, &start_level_inputs_, &output_level_inputs_,
471
- &parent_index_, base_index_, round_robin_expanding)) {
470
+ cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_,
471
+ &output_level_inputs_, &parent_index_, base_index_,
472
+ round_robin_expanding)) {
472
473
  return false;
473
474
  }
474
475
 
@@ -924,11 +925,15 @@ bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
924
925
  }
925
926
  uint64_t l0_size = 0;
926
927
  for (const auto& file : l0_files) {
927
- l0_size += file->fd.GetFileSize();
928
+ assert(file->compensated_file_size >= file->fd.GetFileSize());
929
+ // Compact down L0s with more deletions.
930
+ l0_size += file->compensated_file_size;
928
931
  }
929
- const uint64_t min_lbase_size =
930
- l0_size * static_cast<uint64_t>(std::max(
931
- 10.0, mutable_cf_options_.max_bytes_for_level_multiplier));
932
+
933
+ // Avoid L0->Lbase compactions that are inefficient for write-amp.
934
+ const double kMultiplier =
935
+ std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2;
936
+ const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier);
932
937
  assert(min_lbase_size >= l0_size);
933
938
  const std::vector<FileMetaData*>& lbase_files =
934
939
  vstorage_->LevelFiles(/*level=*/base_level);