@nxtedition/rocksdb 15.4.1 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. package/binding.cc +70 -23
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/index.js +70 -10
  395. package/iterator.js +25 -3
  396. package/max_rev_operator.h +9 -5
  397. package/package.json +1 -1
  398. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  399. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -320,6 +320,22 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
320
320
 
321
321
  WaitForBackgroundWork();
322
322
 
323
+ TEST_SYNC_POINT("DBImpl::ResumeImpl:Start");
324
+
325
+ // With two_write_queues=true, sequence numbers are allocated via
326
+ // FetchAddLastAllocatedSequence() before writes complete, but only
327
+ // published via SetLastSequence() after success. If we're recovering from
328
+ // an error, there may be allocated-but-not-published sequence numbers.
329
+ // We must sync last_sequence_ with last_allocated_sequence_ before creating
330
+ // any new memtables/WALs, otherwise the new WAL could start with a sequence
331
+ // number lower than what was already written, causing "sequence number
332
+ // going backwards" corruption on subsequent recovery.
333
+ if (immutable_db_options_.two_write_queues) {
334
+ versions_->SyncLastSequenceWithAllocated();
335
+ }
336
+
337
+ TEST_SYNC_POINT("DBImpl::ResumeImpl:AfterSyncSeq");
338
+
323
339
  Status s;
324
340
  if (shutdown_initiated_) {
325
341
  // Returning shutdown status to SFM during auto recovery will cause it
@@ -456,7 +472,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
456
472
  void DBImpl::WaitForBackgroundWork() {
457
473
  // Wait for background work to finish
458
474
  while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
459
- bg_flush_scheduled_) {
475
+ bg_flush_scheduled_ || bg_pressure_callback_in_progress_) {
460
476
  bg_cv_.Wait();
461
477
  }
462
478
  }
@@ -545,11 +561,18 @@ Status DBImpl::CloseHelper() {
545
561
  // Wait for background work to finish
546
562
  while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
547
563
  bg_flush_scheduled_ || bg_purge_scheduled_ ||
564
+ bg_pressure_callback_in_progress_ ||
565
+ bg_async_file_open_state_ == AsyncFileOpenState::kScheduled ||
548
566
  pending_purge_obsolete_files_ ||
549
567
  error_handler_.IsRecoveryInProgress()) {
550
568
  TEST_SYNC_POINT("DBImpl::~DBImpl:WaitJob");
551
569
  bg_cv_.Wait();
552
570
  }
571
+
572
+ // Ensure subclasses don't forget to schedule async file opening
573
+ assert(!immutable_db_options_.open_files_async || !opened_successfully_ ||
574
+ bg_async_file_open_state_ != AsyncFileOpenState::kNotScheduled);
575
+
553
576
  TEST_SYNC_POINT_CALLBACK("DBImpl::CloseHelper:PendingPurgeFinished",
554
577
  &files_grabbed_for_purge_);
555
578
  EraseThreadStatusDbInfo();
@@ -768,7 +791,76 @@ void DBImpl::PrintStatistics() {
768
791
  }
769
792
  }
770
793
 
794
+ // Computes the minimum time-based compaction interval for a CF based on
795
+ // various options.
796
+ // Returns 0 if all time-based compaction options are disabled.
797
+ static uint64_t GetMinTimeBasedCompactionInterval(
798
+ const ColumnFamilyOptions& cf_opts) {
799
+ uint64_t min_interval = UINT64_MAX;
800
+ if (cf_opts.periodic_compaction_seconds > 0) {
801
+ min_interval = std::min(min_interval, cf_opts.periodic_compaction_seconds);
802
+ }
803
+ if (cf_opts.ttl > 0) {
804
+ min_interval = std::min(min_interval, cf_opts.ttl);
805
+ }
806
+ const auto& fifo_thresholds =
807
+ cf_opts.compaction_options_fifo.file_temperature_age_thresholds;
808
+ if (!fifo_thresholds.empty() && fifo_thresholds[0].age > 0) {
809
+ // Thresholds are in increasing order by age, so first is smallest
810
+ min_interval = std::min(min_interval, fifo_thresholds[0].age);
811
+ }
812
+ if (cf_opts.bottommost_file_compaction_delay > 0) {
813
+ // NOTE: 0 does not exactly mean "disabled" in this case but it does mean
814
+ // there's no time component to the relevant compaction picking.
815
+ min_interval = std::min(min_interval,
816
+ uint64_t{cf_opts.bottommost_file_compaction_delay});
817
+ }
818
+ // Note: Assume sentinel values like UINT64_MAX - 1 (used by ttl and
819
+ // periodic_compaction_seconds) are like disabling, if they reach here
820
+ // unsanitized.
821
+ return min_interval > UINT64_MAX / 2 ? 0 : min_interval;
822
+ }
823
+
824
+ uint64_t DBImpl::ComputeTriggerCompactionPeriod() {
825
+ // Start with a maximum period of every 12 hours.
826
+ uint64_t period_sec = 12 * 60 * 60;
827
+
828
+ // Consider DB-level options that have the DB waking up periodically anyway.
829
+ // Waking up to check for compactions at the same interval should be no
830
+ // problem, as it should be less overhead than these.
831
+ if (mutable_db_options_.stats_dump_period_sec > 0) {
832
+ period_sec = std::min(period_sec,
833
+ (uint64_t)mutable_db_options_.stats_dump_period_sec);
834
+ }
835
+ if (mutable_db_options_.stats_persist_period_sec > 0) {
836
+ period_sec = std::min(
837
+ period_sec, (uint64_t)mutable_db_options_.stats_persist_period_sec);
838
+ }
839
+
840
+ // Consider per-CF settings that can trigger compaction based on time.
841
+ uint64_t compaction_trigger_sec = UINT64_MAX;
842
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
843
+ if (cfd->IsDropped()) {
844
+ continue;
845
+ }
846
+ uint64_t cf_min =
847
+ GetMinTimeBasedCompactionInterval(cfd->GetLatestCFOptions());
848
+ if (cf_min > 0) {
849
+ compaction_trigger_sec = std::min(compaction_trigger_sec, cf_min);
850
+ }
851
+ }
852
+ // We might not align with those timings perfectly, so we tolerate only some
853
+ // proportional delay, up to 1/kTriggerDivisor ~= 20%.
854
+ constexpr uint64_t kTriggerDivisor = 5;
855
+ period_sec = std::min(period_sec, compaction_trigger_sec / kTriggerDivisor);
856
+ // But must be > 0
857
+ period_sec = std::max(period_sec, uint64_t{1});
858
+
859
+ return period_sec;
860
+ }
861
+
771
862
  Status DBImpl::StartPeriodicTaskScheduler() {
863
+ Status s;
772
864
  #ifndef NDEBUG
773
865
  // It only used by test to disable scheduler
774
866
  bool disable_scheduler = false;
@@ -776,7 +868,7 @@ Status DBImpl::StartPeriodicTaskScheduler() {
776
868
  "DBImpl::StartPeriodicTaskScheduler:DisableScheduler",
777
869
  &disable_scheduler);
778
870
  if (disable_scheduler) {
779
- return Status::OK();
871
+ return s;
780
872
  }
781
873
 
782
874
  {
@@ -787,7 +879,7 @@ Status DBImpl::StartPeriodicTaskScheduler() {
787
879
 
788
880
  #endif // !NDEBUG
789
881
  if (mutable_db_options_.stats_dump_period_sec > 0) {
790
- Status s = periodic_task_scheduler_.Register(
882
+ s = periodic_task_scheduler_.Register(
791
883
  PeriodicTaskType::kDumpStats,
792
884
  periodic_task_functions_.at(PeriodicTaskType::kDumpStats),
793
885
  mutable_db_options_.stats_dump_period_sec,
@@ -797,7 +889,7 @@ Status DBImpl::StartPeriodicTaskScheduler() {
797
889
  }
798
890
  }
799
891
  if (mutable_db_options_.stats_persist_period_sec > 0) {
800
- Status s = periodic_task_scheduler_.Register(
892
+ s = periodic_task_scheduler_.Register(
801
893
  PeriodicTaskType::kPersistStats,
802
894
  periodic_task_functions_.at(PeriodicTaskType::kPersistStats),
803
895
  mutable_db_options_.stats_persist_period_sec,
@@ -807,18 +899,19 @@ Status DBImpl::StartPeriodicTaskScheduler() {
807
899
  }
808
900
  }
809
901
 
810
- Status s = periodic_task_scheduler_.Register(
902
+ s = periodic_task_scheduler_.Register(
811
903
  PeriodicTaskType::kFlushInfoLog,
812
904
  periodic_task_functions_.at(PeriodicTaskType::kFlushInfoLog),
813
905
  /*run_immediately=*/true);
814
-
815
- if (s.ok()) {
816
- s = periodic_task_scheduler_.Register(
817
- PeriodicTaskType::kTriggerCompaction,
818
- periodic_task_functions_.at(PeriodicTaskType::kTriggerCompaction),
819
- /*run_immediately=*/false);
906
+ if (!s.ok()) {
907
+ return s;
820
908
  }
821
909
 
910
+ s = periodic_task_scheduler_.Register(
911
+ PeriodicTaskType::kTriggerCompaction,
912
+ periodic_task_functions_.at(PeriodicTaskType::kTriggerCompaction),
913
+ ComputeTriggerCompactionPeriod(), /*run_immediately=*/false);
914
+
822
915
  return s;
823
916
  }
824
917
 
@@ -1177,23 +1270,38 @@ FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const {
1177
1270
  }
1178
1271
 
1179
1272
  Status DBImpl::SetOptions(
1180
- ColumnFamilyHandle* column_family,
1181
- const std::unordered_map<std::string, std::string>& options_map) {
1273
+ const std::unordered_map<ColumnFamilyHandle*,
1274
+ std::unordered_map<std::string, std::string>>&
1275
+ column_families_opts_map) {
1182
1276
  // TODO: plumb Env::IOActivity, Env::IOPriority
1183
1277
  const ReadOptions read_options;
1184
1278
  const WriteOptions write_options;
1185
1279
 
1186
- auto* cfd =
1187
- static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
1188
- if (options_map.empty()) {
1189
- ROCKS_LOG_WARN(immutable_db_options_.info_log,
1190
- "SetOptions() on column family [%s], empty input",
1191
- cfd->GetName().c_str());
1192
- return Status::InvalidArgument("empty input");
1280
+ if (column_families_opts_map.empty()) {
1281
+ return Status::OK();
1282
+ }
1283
+
1284
+ for (const auto& cf_opts : column_families_opts_map) {
1285
+ if (cf_opts.second.empty()) {
1286
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1287
+ "SetOptions() on column family [%s], empty input",
1288
+ cf_opts.first->GetName().c_str());
1289
+ return Status::InvalidArgument("empty input");
1290
+ }
1291
+ }
1292
+
1293
+ autovector<std::pair<ColumnFamilyData*,
1294
+ const std::unordered_map<std::string, std::string>*>>
1295
+ column_family_datas;
1296
+ for (const auto& cf_opts : column_families_opts_map) {
1297
+ column_family_datas.push_back(
1298
+ {static_cast_with_check<ColumnFamilyHandleImpl>(cf_opts.first)->cfd(),
1299
+ &cf_opts.second});
1193
1300
  }
1194
1301
 
1195
1302
  InstrumentedMutexLock ol(&options_mutex_);
1196
- MutableCFOptions new_options_copy; // For logging outside of DB mutex
1303
+ autovector<MutableCFOptions>
1304
+ new_options_copy; // For logging outside of DB mutex
1197
1305
  Status s;
1198
1306
  Status persist_options_status;
1199
1307
  SuperVersionContext sv_context(/* create_superversion */ true);
@@ -1216,68 +1324,104 @@ Status DBImpl::SetOptions(
1216
1324
  // Thus aren't releasing the DB mutex from LogAndApply calling pre_cb,
1217
1325
  // through installing the new Version until the end of this block, after
1218
1326
  // installing the new SuperVersion.
1219
- auto pre_cb = [&]() -> Status {
1220
- Status cb_s = cfd->SetOptions(db_options, options_map);
1221
- if (cb_s.ok()) {
1222
- new_options_copy = cfd->GetLatestMutableCFOptions();
1223
- }
1224
- return cb_s;
1225
- };
1226
1327
  VersionEdit dummy_edit;
1227
1328
  dummy_edit.MarkNoManifestWriteDummy();
1228
1329
  TEST_SYNC_POINT_CALLBACK("DBImpl::SetOptions:dummy_edit", &dummy_edit);
1229
- s = versions_->LogAndApply(
1230
- cfd, read_options, write_options, &dummy_edit, &mutex_,
1231
- directories_.GetDbDir(), false /*new_descriptor_log=*/,
1232
- nullptr /*new_opts*/, {} /*manifest_wcb*/, pre_cb);
1233
- if (!versions_->io_status().ok()) {
1234
- assert(!s.ok());
1235
- error_handler_.SetBGError(versions_->io_status(),
1236
- BackgroundErrorReason::kManifestWrite);
1330
+ for (const auto& cfd_opts : column_family_datas) {
1331
+ auto* cfd = cfd_opts.first;
1332
+ const auto* options_map_ptr = cfd_opts.second;
1333
+ auto pre_cb = [&]() -> Status {
1334
+ Status cb_s = cfd->SetOptions(db_options, *options_map_ptr);
1335
+ if (cb_s.ok()) {
1336
+ new_options_copy.emplace_back(cfd->GetLatestMutableCFOptions());
1337
+ }
1338
+ return cb_s;
1339
+ };
1340
+
1341
+ s = versions_->LogAndApply(
1342
+ cfd, read_options, write_options, &dummy_edit, &mutex_,
1343
+ directories_.GetDbDir(), false /*new_descriptor_log=*/,
1344
+ nullptr /*new_opts*/, {} /*manifest_wcb*/, pre_cb);
1345
+ if (!versions_->io_status().ok()) {
1346
+ assert(!s.ok());
1347
+ error_handler_.SetBGError(versions_->io_status(),
1348
+ BackgroundErrorReason::kManifestWrite);
1349
+ }
1350
+ if (!s.ok()) {
1351
+ break;
1352
+ }
1237
1353
  }
1238
1354
 
1239
1355
  if (s.ok()) {
1240
1356
  // Trigger possible flush/compactions. This has to be before we persist
1241
1357
  // options to file, otherwise there will be a deadlock with writer
1242
1358
  // thread.
1243
- InstallSuperVersionForConfigChange(cfd, &sv_context);
1359
+ for (const auto& cfd_opts : column_family_datas) {
1360
+ InstallSuperVersionForConfigChange(cfd_opts.first, &sv_context);
1361
+ }
1244
1362
  persist_options_status =
1245
1363
  WriteOptionsFile(write_options, true /*db_mutex_already_held*/);
1246
1364
  bg_cv_.SignalAll();
1247
1365
 
1248
- assert(new_options_copy == cfd->GetLatestMutableCFOptions());
1249
- assert(cfd->GetLatestMutableCFOptions() ==
1250
- cfd->GetCurrentMutableCFOptions());
1251
- assert(cfd->GetCurrentMutableCFOptions() ==
1252
- cfd->current()->GetMutableCFOptions());
1366
+ #ifndef NDEBUG
1367
+ for (size_t i = 0; i < column_family_datas.size(); ++i) {
1368
+ auto* cfd = column_family_datas[i].first;
1369
+ assert(new_options_copy[i] == cfd->GetLatestMutableCFOptions());
1370
+ assert(cfd->GetLatestMutableCFOptions() ==
1371
+ cfd->GetCurrentMutableCFOptions());
1372
+ assert(cfd->GetCurrentMutableCFOptions() ==
1373
+ cfd->current()->GetMutableCFOptions());
1374
+ }
1375
+ #endif
1253
1376
  }
1254
1377
  }
1255
1378
  sv_context.Clean();
1256
1379
 
1257
- if (s.ok() && (options_map.count("preserve_internal_time_seconds") > 0 ||
1258
- options_map.count("preclude_last_level_data_seconds") > 0)) {
1259
- s = RegisterRecordSeqnoTimeWorker();
1380
+ if (s.ok()) {
1381
+ bool needs_seqno_worker = false;
1382
+ for (const auto& cf_opts : column_families_opts_map) {
1383
+ if (cf_opts.second.count("preserve_internal_time_seconds") > 0 ||
1384
+ cf_opts.second.count("preclude_last_level_data_seconds") > 0) {
1385
+ needs_seqno_worker = true;
1386
+ break;
1387
+ }
1388
+ }
1389
+ if (needs_seqno_worker) {
1390
+ s = RegisterRecordSeqnoTimeWorker();
1391
+ }
1260
1392
  }
1261
1393
 
1262
- ROCKS_LOG_INFO(
1263
- immutable_db_options_.info_log,
1264
- "SetOptions() on column family [%s], inputs:", cfd->GetName().c_str());
1265
- for (const auto& o : options_map) {
1266
- ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n", o.first.c_str(),
1267
- o.second.c_str());
1394
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1395
+ "SetOptions() on [%zu] column families, inputs:",
1396
+ column_family_datas.size());
1397
+ for (size_t i = 0; i < column_family_datas.size(); ++i) {
1398
+ const auto* cfd = column_family_datas[i].first;
1399
+ const auto* options_map_ptr = column_family_datas[i].second;
1400
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1401
+ "Set options on column family [%s] (%zu/%zu), inputs:",
1402
+ cfd->GetName().c_str(), i, column_family_datas.size());
1403
+ for (const auto& o : *options_map_ptr) {
1404
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n",
1405
+ o.first.c_str(), o.second.c_str());
1406
+ }
1268
1407
  }
1269
1408
  if (s.ok()) {
1270
- ROCKS_LOG_INFO(immutable_db_options_.info_log,
1271
- "[%s] SetOptions() succeeded", cfd->GetName().c_str());
1272
- new_options_copy.Dump(immutable_db_options_.info_log.get());
1409
+ for (size_t i = 0; i < column_family_datas.size(); ++i) {
1410
+ const auto* cfd = column_family_datas[i].first;
1411
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1412
+ "Set options on column family [%s] (%zu/%zu) succeeded, "
1413
+ "updated CF options:",
1414
+ cfd->GetName().c_str(), i, column_family_datas.size());
1415
+ new_options_copy[i].Dump(immutable_db_options_.info_log.get());
1416
+ }
1273
1417
  if (!persist_options_status.ok()) {
1274
1418
  // NOTE: WriteOptionsFile already logs on failure
1275
1419
  s = persist_options_status;
1276
1420
  }
1277
1421
  } else {
1278
1422
  persist_options_status.PermitUncheckedError(); // less important
1279
- ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] SetOptions() failed",
1280
- cfd->GetName().c_str());
1423
+ ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetOptions() failed: %s",
1424
+ s.ToString().c_str());
1281
1425
  }
1282
1426
  LogFlush(immutable_db_options_.info_log);
1283
1427
  return s;
@@ -1404,6 +1548,13 @@ Status DBImpl::SetDBOptions(
1404
1548
  table_cache_.get()->SetCapacity(new_options.max_open_files == -1
1405
1549
  ? TableCache::kInfiniteCapacity
1406
1550
  : new_options.max_open_files - 10);
1551
+ // Potential table cache capacity change requires updating if table
1552
+ // handles should get pinned.
1553
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
1554
+ if (!cfd->IsDropped()) {
1555
+ cfd->table_cache()->UpdateShouldPinTableHandles();
1556
+ }
1557
+ }
1407
1558
  wal_other_option_changed = mutable_db_options_.wal_bytes_per_sync !=
1408
1559
  new_options.wal_bytes_per_sync;
1409
1560
  wal_size_option_changed = mutable_db_options_.max_total_wal_size !=
@@ -3858,10 +4009,6 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options,
3858
4009
  read_options.io_activity = Env::IOActivity::kDBIterator;
3859
4010
  }
3860
4011
 
3861
- if (read_options.managed) {
3862
- return NewErrorIterator(
3863
- Status::NotSupported("Managed iterator is not supported anymore."));
3864
- }
3865
4012
  Iterator* result = nullptr;
3866
4013
  if (read_options.read_tier == kPersistedTier) {
3867
4014
  return NewErrorIterator(Status::NotSupported(
@@ -4061,9 +4208,6 @@ Status DBImpl::NewIterators(
4061
4208
  if (read_options.io_activity == Env::IOActivity::kUnknown) {
4062
4209
  read_options.io_activity = Env::IOActivity::kDBIterator;
4063
4210
  }
4064
- if (read_options.managed) {
4065
- return Status::NotSupported("Managed iterator is not supported anymore.");
4066
- }
4067
4211
  if (read_options.read_tier == kPersistedTier) {
4068
4212
  return Status::NotSupported(
4069
4213
  "ReadTier::kPersistedData is not yet supported in iterators.");
@@ -4351,7 +4495,8 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
4351
4495
  for (auto* cfd : *versions_->GetColumnFamilySet()) {
4352
4496
  if (!cfd->AllowIngestBehind()) {
4353
4497
  cfd->current()->storage_info()->UpdateOldestSnapshot(
4354
- oldest_snapshot, /*allow_ingest_behind=*/false);
4498
+ oldest_snapshot, /*allow_ingest_behind=*/false,
4499
+ cfd->ioptions().user_comparator, cfd->GetFullHistoryTsLow());
4355
4500
  if (!cfd->current()
4356
4501
  ->storage_info()
4357
4502
  ->BottommostFilesMarkedForCompaction()
@@ -4987,7 +5132,8 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
4987
5132
  }
4988
5133
  if (!deleted_files.empty()) {
4989
5134
  vstorage->ComputeCompactionScore(cfd->ioptions(),
4990
- cfd->GetLatestMutableCFOptions());
5135
+ cfd->GetLatestMutableCFOptions(),
5136
+ cfd->GetFullHistoryTsLow());
4991
5137
  }
4992
5138
  if (edit.GetDeletedFiles().empty()) {
4993
5139
  job_context.Clean();
@@ -6418,8 +6564,11 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
6418
6564
  fmeta->file_checksum_func_name, fname,
6419
6565
  read_options);
6420
6566
  } else {
6567
+ FileOptions fopts = file_options_;
6568
+ fopts.file_checksum = fmeta->file_checksum;
6569
+ fopts.file_checksum_func_name = fmeta->file_checksum_func_name;
6421
6570
  s = ROCKSDB_NAMESPACE::VerifySstFileChecksumInternal(
6422
- opts, file_options_, read_options, fname, fd.largest_seqno);
6571
+ opts, fopts, read_options, fname, fd.largest_seqno);
6423
6572
  }
6424
6573
  RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
6425
6574
  IOSTATS(bytes_read) - prev_bytes_read);
@@ -6487,12 +6636,15 @@ Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected,
6487
6636
  }
6488
6637
  std::string file_checksum;
6489
6638
  std::string func_name;
6639
+ FileOptions fopts;
6640
+ fopts.file_checksum = file_checksum_expected;
6641
+ fopts.file_checksum_func_name = func_name_expected;
6490
6642
  s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum(
6491
6643
  fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(),
6492
6644
  func_name_expected, &file_checksum, &func_name,
6493
6645
  read_options.readahead_size, immutable_db_options_.allow_mmap_reads,
6494
6646
  io_tracer_, immutable_db_options_.rate_limiter.get(), read_options,
6495
- immutable_db_options_.stats, immutable_db_options_.clock);
6647
+ immutable_db_options_.stats, immutable_db_options_.clock, fopts);
6496
6648
  if (s.ok()) {
6497
6649
  assert(func_name_expected == func_name);
6498
6650
  if (file_checksum != file_checksum_expected) {
@@ -6848,10 +7000,20 @@ void DBImpl::TriggerPeriodicCompaction() {
6848
7000
  if (cfd->IsDropped()) {
6849
7001
  continue;
6850
7002
  }
6851
- if (cfd->GetLatestCFOptions().periodic_compaction_seconds &&
6852
- !cfd->queued_for_compaction()) {
7003
+ if (cfd->queued_for_compaction()) {
7004
+ continue;
7005
+ }
7006
+ // Check if this CF has any time-based compaction trigger configured.
7007
+ // This includes periodic_compaction_seconds, ttl, or FIFO temperature
7008
+ // thresholds. Note: periodic_compaction_seconds may be 0 even when
7009
+ // ttl or temperature thresholds are set, due to option sanitization.
7010
+ if (GetMinTimeBasedCompactionInterval(cfd->GetLatestCFOptions()) > 0) {
7011
+ TEST_SYNC_POINT_CALLBACK(
7012
+ "DBImpl::TriggerPeriodicCompaction:BeforeComputeCompactionScore",
7013
+ cfd);
6853
7014
  cfd->current()->storage_info()->ComputeCompactionScore(
6854
- cfd->ioptions(), cfd->GetLatestMutableCFOptions());
7015
+ cfd->ioptions(), cfd->GetLatestMutableCFOptions(),
7016
+ cfd->GetFullHistoryTsLow());
6855
7017
  EnqueuePendingCompaction(cfd);
6856
7018
  if (cfd->queued_for_compaction()) {
6857
7019
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
@@ -455,19 +455,20 @@ class DBImpl : public DB {
455
455
 
456
456
  void EnableManualCompaction() override;
457
457
  void DisableManualCompaction() override;
458
+ void AbortAllCompactions() override;
459
+ void ResumeAllCompactions() override;
458
460
 
459
461
  using DB::SetOptions;
460
462
  Status SetOptions(
461
- ColumnFamilyHandle* column_family,
462
- const std::unordered_map<std::string, std::string>& options_map) override;
463
+ const std::unordered_map<ColumnFamilyHandle*,
464
+ std::unordered_map<std::string, std::string>>&
465
+ column_families_opts_map) override;
463
466
 
464
467
  Status SetDBOptions(
465
468
  const std::unordered_map<std::string, std::string>& options_map) override;
466
469
 
467
470
  using DB::NumberLevels;
468
471
  int NumberLevels(ColumnFamilyHandle* column_family) override;
469
- using DB::MaxMemCompactionLevel;
470
- int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) override;
471
472
  using DB::Level0StopWriteTrigger;
472
473
  int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) override;
473
474
  const std::string& GetName() const override;
@@ -1247,6 +1248,7 @@ class DBImpl : public DB {
1247
1248
 
1248
1249
  int TEST_BGCompactionsAllowed() const;
1249
1250
  int TEST_BGFlushesAllowed() const;
1251
+ int TEST_NumRunningBottomCompactions() const;
1250
1252
  size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
1251
1253
  void TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const;
1252
1254
  SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const;
@@ -1717,6 +1719,17 @@ class DBImpl : public DB {
1717
1719
  // recovery.
1718
1720
  Status LogAndApplyForRecovery(const RecoveryContext& recovery_ctx);
1719
1721
 
1722
+ // Schedule background work to open and validate SST files asynchronously.
1723
+ // Called when open_files_async is enabled.
1724
+ void ScheduleAsyncFileOpening();
1725
+
1726
+ // Mark async file opening as not needed. Used by subclasses that load
1727
+ // table files through a different mechanism (e.g., ReactiveVersionSet).
1728
+ void MarkAsyncFileOpenNotNeeded();
1729
+
1730
+ // Background work function for async file opening.
1731
+ static void BGWorkAsyncFileOpen(void* arg);
1732
+
1720
1733
  void InvokeWalFilterIfNeededOnColumnFamilyToWalNumberMap();
1721
1734
 
1722
1735
  // Return true to proceed with current WAL record whose content is stored in
@@ -1727,8 +1740,12 @@ class DBImpl : public DB {
1727
1740
  Status& status, bool& stop_replay,
1728
1741
  WriteBatch& batch);
1729
1742
 
1743
+ // Indicate DB was opened successfully
1744
+ bool opened_successfully_ = false;
1745
+
1730
1746
  private:
1731
1747
  friend class DB;
1748
+ friend class DBImplSecondary;
1732
1749
  friend class ErrorHandler;
1733
1750
  friend class InternalStats;
1734
1751
  friend class PessimisticTransaction;
@@ -2149,7 +2166,7 @@ class DBImpl : public DB {
2149
2166
  Status InsertLogRecordToMemtable(WriteBatch* batch_to_use,
2150
2167
  uint64_t wal_number,
2151
2168
  SequenceNumber* next_sequence,
2152
- bool* has_valid_writes);
2169
+ bool* has_valid_writes, bool read_only);
2153
2170
 
2154
2171
  Status MaybeWriteLevel0TableForRecovery(
2155
2172
  bool has_valid_writes, bool read_only, uint64_t wal_number, int job_id,
@@ -2415,6 +2432,9 @@ class DBImpl : public DB {
2415
2432
 
2416
2433
  void MaybeScheduleFlushOrCompaction();
2417
2434
 
2435
+ BackgroundJobPressure CaptureBackgroundJobPressure() const;
2436
+ void NotifyOnBackgroundJobPressureChanged();
2437
+
2418
2438
  struct FlushRequest {
2419
2439
  FlushReason flush_reason;
2420
2440
  // A map from column family to flush to largest memtable id to persist for
@@ -2505,6 +2525,13 @@ class DBImpl : public DB {
2505
2525
  // Schedule background tasks
2506
2526
  Status StartPeriodicTaskScheduler();
2507
2527
 
2528
+ // Compute the repeat period for the kTriggerCompaction task, which ensures
2529
+ // compactions not dependent on writes (flushes) are eventually triggered when
2530
+ // there are no writes (flushes). NOT thread safe; only called during DB open
2531
+ // (StartPeriodicTaskScheduler). KNOWN LIMITATION: doesn't get updated with
2532
+ // dynamic option updates. (Probably not worth the extra complexity.)
2533
+ uint64_t ComputeTriggerCompactionPeriod();
2534
+
2508
2535
  // Cancel scheduled periodic tasks
2509
2536
  Status CancelPeriodicTaskScheduler();
2510
2537
 
@@ -2788,6 +2815,14 @@ class DBImpl : public DB {
2788
2815
  // compaction code paths.
2789
2816
  std::atomic<int> manual_compaction_paused_ = false;
2790
2817
 
2818
+ // If non-zero, all compaction jobs (background automatic compactions,
2819
+ // manual compactions via CompactRange, and foreground CompactFiles calls)
2820
+ // are being aborted. Compactions will be signaled to stop. Any new
2821
+ // compaction job would fail immediately. The value indicates how many threads
2822
+ // have called AbortAllCompactions(). It is accessed in read mode outside the
2823
+ // DB mutex in compaction code paths.
2824
+ std::atomic<int> compaction_aborted_ = 0;
2825
+
2791
2826
  // This condition variable is signaled on these conditions:
2792
2827
  // * whenever bg_compaction_scheduled_ goes down to 0
2793
2828
  // * if AnyManualCompaction, whenever a compaction finishes, even if it hasn't
@@ -3042,6 +3077,9 @@ class DBImpl : public DB {
3042
3077
  // stores the number of compactions are currently running
3043
3078
  int num_running_compactions_ = 0;
3044
3079
 
3080
+ // stores the number of BOTTOM-priority compactions currently running
3081
+ int num_running_bottom_compactions_ = 0;
3082
+
3045
3083
  // number of background memtable flush jobs, submitted to the HIGH pool
3046
3084
  int bg_flush_scheduled_ = 0;
3047
3085
 
@@ -3051,6 +3089,19 @@ class DBImpl : public DB {
3051
3089
  // number of background obsolete file purge jobs, submitted to the HIGH pool
3052
3090
  int bg_purge_scheduled_ = 0;
3053
3091
 
3092
+ // number of pressure callbacks currently in progress (for destructor safety)
3093
+ int bg_pressure_callback_in_progress_ = 0;
3094
+
3095
+ enum class AsyncFileOpenState : uint8_t {
3096
+ kNotScheduled = 0, // Async file opening has not been scheduled.
3097
+ kScheduled, // Async file opening is in-flight in the HIGH pool.
3098
+ kComplete, // Async file opening has finished (or was not needed).
3099
+ };
3100
+
3101
+ // Tracks whether background async file opening has been scheduled/completed.
3102
+ AsyncFileOpenState bg_async_file_open_state_ =
3103
+ AsyncFileOpenState::kNotScheduled;
3104
+
3054
3105
  std::deque<ManualCompactionState*> manual_compaction_dequeue_;
3055
3106
 
3056
3107
  // shall we disable deletion of obsolete files
@@ -3106,9 +3157,6 @@ class DBImpl : public DB {
3106
3157
  // Guard against multiple concurrent refitting
3107
3158
  bool refitting_level_ = false;
3108
3159
 
3109
- // Indicate DB was opened successfully
3110
- bool opened_successfully_ = false;
3111
-
3112
3160
  // The min threshold to triggere bottommost compaction for removing
3113
3161
  // garbages, among all column families.
3114
3162
  SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber;
@@ -3176,7 +3224,7 @@ class DBImpl : public DB {
3176
3224
  // installed to MANIFEST first.
3177
3225
  InstrumentedCondVar atomic_flush_install_cv_;
3178
3226
 
3179
- bool wal_in_db_path_;
3227
+ bool wal_in_db_path_ = false;
3180
3228
  std::atomic<uint64_t> max_total_wal_size_;
3181
3229
 
3182
3230
  BlobFileCompletionCallback blob_callback_;