@nxtedition/rocksdb 15.4.0 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (402) hide show
  1. package/binding.cc +24 -19
  2. package/cache.js +1 -1
  3. package/chained-batch.js +12 -3
  4. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  5. package/deps/rocksdb/rocksdb/BUCK +42 -0
  6. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  7. package/deps/rocksdb/rocksdb/Makefile +59 -32
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  9. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  10. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  11. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  12. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  13. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  19. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  26. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  28. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  29. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  31. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  33. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  34. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  53. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  54. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  55. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  57. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  58. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  59. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  60. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  61. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  62. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  63. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  64. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  65. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  66. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  67. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  68. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  79. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  80. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  81. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  82. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  83. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  84. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  85. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  86. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  87. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  88. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  89. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  90. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  91. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  92. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  93. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  94. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  95. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  96. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  97. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  98. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  99. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  100. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  101. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  102. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  103. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  104. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  105. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  106. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  107. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  110. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  111. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  112. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  113. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  115. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  116. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  118. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  119. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  120. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  121. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  122. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  123. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  124. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  125. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  126. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  127. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  128. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  129. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  130. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  131. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  132. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  133. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  134. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  135. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  136. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  137. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  138. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  139. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  140. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  141. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  142. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  143. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  144. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  145. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  146. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  147. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  148. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  150. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  151. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  152. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  153. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  160. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  161. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  162. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  163. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  164. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  165. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  166. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  167. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  168. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  169. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  171. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  173. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  174. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  175. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  176. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  177. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  180. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  181. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  182. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  183. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  184. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  185. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  187. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  188. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  189. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  192. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  194. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  197. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  198. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  199. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  200. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  202. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  204. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  205. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  206. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  210. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  211. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  212. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  213. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  214. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  215. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  216. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  217. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  218. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  219. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  220. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  221. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  222. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  223. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  224. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  225. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  226. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  227. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  228. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  229. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  230. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  231. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  232. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  233. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  234. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  235. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  236. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  237. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  238. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  239. package/deps/rocksdb/rocksdb/src.mk +12 -0
  240. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  241. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  243. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  253. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  254. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  255. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  256. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  257. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  258. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  259. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  260. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  261. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  263. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  264. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  265. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  266. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  267. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  268. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  269. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  270. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  273. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  274. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  275. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  276. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  277. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  278. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  279. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  280. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  281. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  282. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  283. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  284. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  286. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  287. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  288. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  289. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  290. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  291. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  292. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  293. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  294. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  295. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  296. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  297. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  298. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  299. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  300. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  301. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  302. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  303. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  304. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  305. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  306. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  307. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  308. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  309. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  310. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  311. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  312. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  313. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  314. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  315. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  316. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  317. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  318. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  319. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  320. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  321. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  322. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  323. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  324. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  325. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  326. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  328. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  329. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  331. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  332. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  333. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  334. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  335. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  336. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  337. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  338. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  339. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  340. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  341. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  342. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  343. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  344. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  355. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  356. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  358. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  360. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  361. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  362. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  364. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  365. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  366. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  367. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  368. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  369. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  370. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  371. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  373. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  375. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  376. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  377. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  378. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  380. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  381. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  388. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  389. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  390. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  391. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  392. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  393. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  394. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  395. package/deps/rocksdb/rocksdb.gyp +7 -0
  396. package/index.js +11 -2
  397. package/iterator.js +15 -7
  398. package/package.json +1 -1
  399. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  400. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -106,7 +106,8 @@ struct PerfContextBase {
106
106
  uint64_t compressed_sec_cache_compressed_bytes;
107
107
 
108
108
  uint64_t block_checksum_time; // total nanos spent on block checksum
109
- uint64_t block_decompress_time; // total nanos spent on block decompression
109
+ uint64_t block_decompress_time; // total nanos spent on block decompression
110
+ uint64_t block_decompress_count; // total number of block decompressions
110
111
 
111
112
  uint64_t get_read_bytes; // bytes for vals returned by Get
112
113
  uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet
@@ -290,6 +291,14 @@ struct PerfContextBase {
290
291
  uint64_t file_ingestion_nanos;
291
292
  // Time IngestExternalFile blocked live writes.
292
293
  uint64_t file_ingestion_blocking_live_writes_nanos;
294
+
295
+ // Bytes read from storage for each block category. These add up to
296
+ // block_read_byte.
297
+ uint64_t data_block_read_byte;
298
+ uint64_t index_block_read_byte;
299
+ uint64_t filter_block_read_byte;
300
+ uint64_t compression_dict_block_read_byte;
301
+ uint64_t metadata_block_read_byte;
293
302
  };
294
303
 
295
304
  struct PerfContext : public PerfContextBase {
@@ -132,7 +132,7 @@ class RateLimiter {
132
132
  }
133
133
 
134
134
  protected:
135
- Mode GetMode() { return mode_; }
135
+ Mode GetMode() const { return mode_; }
136
136
 
137
137
  private:
138
138
  const Mode mode_;
@@ -8,9 +8,8 @@
8
8
  //
9
9
  // Class for specifying user-defined functions which perform a
10
10
  // transformation on a slice. It is not required that every slice
11
- // belong to the domain and/or range of a function. Subclasses should
12
- // define InDomain and InRange to determine which slices are in either
13
- // of these sets respectively.
11
+ // belong to the domain of a function. Subclasses should
12
+ // define InDomain to determine which slices are in this set.
14
13
 
15
14
  #pragma once
16
15
 
@@ -70,10 +69,6 @@ class SliceTransform : public Customizable {
70
69
  //
71
70
  virtual bool InDomain(const Slice& key) const = 0;
72
71
 
73
- // DEPRECATED: This is currently not used and remains here for backward
74
- // compatibility.
75
- virtual bool InRange(const Slice& /*dst*/) const { return false; }
76
-
77
72
  // Returns information on maximum prefix length, if there is one.
78
73
  // If Transform(x).size() == n for some keys and otherwise < n,
79
74
  // should return true and set *len = n. Returning false is safe but
@@ -35,6 +35,19 @@ class SstFileReader {
35
35
  const std::vector<Slice>& keys,
36
36
  std::vector<std::string>* values);
37
37
 
38
+ // MultiGet variant that returns PinnableSlice values, enabling zero-copy
39
+ // when the underlying TableReader supports pinning.
40
+ std::vector<Status> MultiGet(const ReadOptions& options,
41
+ const std::vector<Slice>& keys,
42
+ std::vector<PinnableSlice>* values);
43
+
44
+ // Point lookup a single key from the SST.
45
+ Status Get(const ReadOptions& options, const Slice& key, std::string* value);
46
+
47
+ // Point lookup variant that returns a PinnableSlice.
48
+ Status Get(const ReadOptions& options, const Slice& key,
49
+ PinnableSlice* value);
50
+
38
51
  // Returns a new iterator over the table contents as a raw table iterator,
39
52
  // a.k.a a `TableIterator`that iterates all point data entries in the table
40
53
  // including logically invisible entries like delete entries.
@@ -82,24 +82,19 @@ class SstFileWriter {
82
82
  // hint that this file pages is not needed every time we write 1MB to the
83
83
  // file. To use the rate limiter an io_priority smaller than IO_TOTAL can be
84
84
  // passed.
85
- // The `skip_filters` option is DEPRECATED and could be removed in the
86
- // future. Use `BlockBasedTableOptions::filter_policy` to control filter
87
- // generation.
88
85
  SstFileWriter(const EnvOptions& env_options, const Options& options,
89
86
  ColumnFamilyHandle* column_family = nullptr,
90
87
  bool invalidate_page_cache = true,
91
- Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL,
92
- bool skip_filters = false)
88
+ Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL)
93
89
  : SstFileWriter(env_options, options, options.comparator, column_family,
94
- invalidate_page_cache, io_priority, skip_filters) {}
90
+ invalidate_page_cache, io_priority) {}
95
91
 
96
92
  // Deprecated API
97
93
  SstFileWriter(const EnvOptions& env_options, const Options& options,
98
94
  const Comparator* user_comparator,
99
95
  ColumnFamilyHandle* column_family = nullptr,
100
96
  bool invalidate_page_cache = true,
101
- Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL,
102
- bool skip_filters = false);
97
+ Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL);
103
98
 
104
99
  ~SstFileWriter();
105
100
 
@@ -107,12 +102,6 @@ class SstFileWriter {
107
102
  Status Open(const std::string& file_path,
108
103
  Temperature temp = Temperature::kUnknown);
109
104
 
110
- // Add a Put key with value to currently opened file (deprecated)
111
- // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
112
- // key according to the comparator.
113
- // REQUIRES: comparator is *not* timestamp-aware.
114
- [[deprecated]] Status Add(const Slice& user_key, const Slice& value);
115
-
116
105
  // Add a Put key with value to currently opened file
117
106
  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
118
107
  // key according to the comparator.
@@ -162,6 +162,8 @@ enum Tickers : uint32_t {
162
162
  COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
163
163
  // If a compaction was canceled in sfm to prevent ENOSPC
164
164
  COMPACTION_CANCELLED,
165
+ // Number of compactions aborted via AbortAllCompactions()
166
+ COMPACTION_ABORTED,
165
167
 
166
168
  // Number of keys written to the database via the Put and Write call's
167
169
  NUMBER_KEYS_WRITTEN,
@@ -301,7 +303,7 @@ enum Tickers : uint32_t {
301
303
  NUMBER_RATE_LIMITER_DRAINS,
302
304
 
303
305
  // BlobDB specific stats
304
- // # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
306
+ // # of Put/PutWithTTL to BlobDB. Only applicable to legacy BlobDB.
305
307
  BLOB_DB_NUM_PUT,
306
308
  // # of Write to BlobDB. Only applicable to legacy BlobDB.
307
309
  BLOB_DB_NUM_WRITE,
@@ -326,12 +328,12 @@ enum Tickers : uint32_t {
326
328
  // # of bytes (keys + value) read from BlobDB. Only applicable to legacy
327
329
  // BlobDB.
328
330
  BLOB_DB_BYTES_READ,
329
- // # of keys written by BlobDB as non-TTL inlined value. Only applicable to
330
- // legacy BlobDB.
331
- BLOB_DB_WRITE_INLINED,
332
- // # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
333
- // BlobDB.
334
- BLOB_DB_WRITE_INLINED_TTL,
331
+ // Deprecated: min_blob_size is no longer configurable. Retained to avoid
332
+ // shifting enum values.
333
+ BLOB_DB_WRITE_INLINED_DEPRECATED,
334
+ // Deprecated: min_blob_size is no longer configurable. Retained to avoid
335
+ // shifting enum values.
336
+ BLOB_DB_WRITE_INLINED_TTL_DEPRECATED,
335
337
  // # of keys written by BlobDB as non-TTL blob value. Only applicable to
336
338
  // legacy BlobDB.
337
339
  BLOB_DB_WRITE_BLOB,
@@ -552,6 +554,35 @@ enum Tickers : uint32_t {
552
554
  // Failure to load the UDI during SST table open
553
555
  SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT,
554
556
 
557
+ // MultiScan statistics
558
+ // # of Prepare() calls
559
+ MULTISCAN_PREPARE_CALLS,
560
+ // # of Prepare() calls that failed
561
+ MULTISCAN_PREPARE_ERRORS,
562
+ // # of data blocks prefetched from storage during MultiScan
563
+ MULTISCAN_BLOCKS_PREFETCHED,
564
+ // # of blocks found already in cache during MultiScan Prepare
565
+ MULTISCAN_BLOCKS_FROM_CACHE,
566
+ // Total bytes prefetched during MultiScan
567
+ MULTISCAN_PREFETCH_BYTES,
568
+ // # of prefetched blocks that were never accessed
569
+ MULTISCAN_PREFETCH_BLOCKS_WASTED,
570
+ // # of actual I/O requests issued during MultiScan
571
+ MULTISCAN_IO_REQUESTS,
572
+ // # of non-adjacent blocks coalesced into single I/O (within
573
+ // io_coalesce_threshold)
574
+ MULTISCAN_IO_COALESCED_NONADJACENT,
575
+ // # of seeks that failed validation (out of order, etc.)
576
+ MULTISCAN_SEEK_ERRORS,
577
+
578
+ // IODispatcher memory limiting statistics
579
+ // # of bytes granted to prefetch requests
580
+ PREFETCH_MEMORY_BYTES_GRANTED,
581
+ // # of bytes released from prefetch memory
582
+ PREFETCH_MEMORY_BYTES_RELEASED,
583
+ // # of prefetch requests that were blocked waiting for memory
584
+ PREFETCH_MEMORY_REQUESTS_BLOCKED,
585
+
555
586
  TICKER_ENUM_MAX
556
587
  };
557
588
 
@@ -630,8 +661,7 @@ enum Histograms : uint32_t {
630
661
  BLOB_DB_KEY_SIZE,
631
662
  // Size of values written to BlobDB. Only applicable to legacy BlobDB.
632
663
  BLOB_DB_VALUE_SIZE,
633
- // BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
634
- // BlobDB.
664
+ // BlobDB Put/PutWithTTL/Write latency. Only applicable to legacy BlobDB.
635
665
  BLOB_DB_WRITE_MICROS,
636
666
  // BlobDB Get latency. Only applicable to legacy BlobDB.
637
667
  BLOB_DB_GET_MICROS,
@@ -695,6 +725,16 @@ enum Histograms : uint32_t {
695
725
  // MultiScan Prefill iterator Prepare cost
696
726
  MULTISCAN_PREPARE_ITERATORS,
697
727
 
728
+ // Total Prepare() latency for MultiScan
729
+ MULTISCAN_PREPARE_MICROS,
730
+ // Distribution of blocks prefetched per MultiScan Prepare()
731
+ MULTISCAN_BLOCKS_PER_PREPARE,
732
+
733
+ // Coefficient of variation of key gaps in blocks, scaled by 10000
734
+ // (e.g., CV of 0.4532 is recorded as 4532). Currently only used by index
735
+ // blocks for uniform key distribution tracking.
736
+ BLOCK_KEY_DISTRIBUTION_CV,
737
+
698
738
  HISTOGRAM_ENUM_MAX
699
739
  };
700
740
 
@@ -117,6 +117,7 @@ class Status {
117
117
  kMergeOperandThresholdExceeded = 16,
118
118
  kPrefetchLimitReached = 17,
119
119
  kNotExpectedCodePath = 18,
120
+ kCompactionAborted = 19,
120
121
  kMaxSubCode
121
122
  };
122
123
 
@@ -483,6 +484,13 @@ class Status {
483
484
  return (code() == kIncomplete) && (subcode() == kManualCompactionPaused);
484
485
  }
485
486
 
487
+ // Returns true iff the status indicates compaction aborted. This
488
+ // is caused by a call to AbortAllCompactions
489
+ bool IsCompactionAborted() const {
490
+ MarkChecked();
491
+ return (code() == kIncomplete) && (subcode() == kCompactionAborted);
492
+ }
493
+
486
494
  // Returns true iff the status indicates a TxnNotPrepared error.
487
495
  bool IsTxnNotPrepared() const {
488
496
  MarkChecked();
@@ -263,6 +263,34 @@ struct BlockBasedTableOptions {
263
263
 
264
264
  IndexType index_type = kBinarySearch;
265
265
 
266
+ // The search algorithm used when seeking to entries in the index block.
267
+ //
268
+ // Note: This option is only used at read time and is compatible with any type
269
+ // of block.
270
+ enum BlockSearchType : char {
271
+ // Standard binary search
272
+ kBinary = 0x00,
273
+ // Interpolation search, which may be better suited for uniformly
274
+ // distributed keys. This will only be applicable if the comparator is the
275
+ // byte-wise comparator. Avoid using
276
+ // IndexShorteningMode::kShortenSeparatorsAndSuccessor as shortening the
277
+ // succesor can skew the end key and make interpolation search significantly
278
+ // less performant.
279
+ kInterpolation = 0x01,
280
+ // See `uniform_cv_threshold`. On the write path if `uniform_cv_threshold`
281
+ // >= 0, then it is possible for a block to be marked as "is_uniform=true"
282
+ // in the block footer via bit flag. On files from older versions or
283
+ // produced via `uniform_cv_threshold` < 0, blocks are always marked as
284
+ // "is_uniform=false".
285
+ //
286
+ // When kAuto is used, the search algorithm will use interpolation search if
287
+ // "is_uniform" flag is set in the block footer, otherwise it will use
288
+ // binary search.
289
+ kAuto = 0x02,
290
+ };
291
+
292
+ BlockSearchType index_block_search_type = kBinary;
293
+
266
294
  // The index type that will be used for the data block.
267
295
  enum DataBlockIndexType : char {
268
296
  kDataBlockBinarySearch = 0, // traditional block type
@@ -558,8 +586,9 @@ struct BlockBasedTableOptions {
558
586
  uint32_t read_amp_bytes_per_bit = 0;
559
587
 
560
588
  // We currently have these format versions:
561
- // 0 - 1 -- Unsupported for writing new files and quietly sanitized to 2.
562
- // Read support is deprecated and could be removed in the future.
589
+ // 0 - 1 -- No longer supported. Attempting to read files with these format
590
+ // versions will return an error. To upgrade, load the data with RocksDB
591
+ // >= 4.6.0 and < 11.0.0, then run a full compaction.
563
592
  // 2 -- Can be read by RocksDB's versions since 3.10. Changes the way we
564
593
  // encode compressed blocks with LZ4, BZip2 and Zlib compression. If you
565
594
  // don't plan to run RocksDB before version 3.10, you should probably use
@@ -593,7 +622,32 @@ struct BlockBasedTableOptions {
593
622
  // validation and sufficient time and number of releases have elapsed
594
623
  // (6 months recommended) to ensure a clean downgrade/revert path for users
595
624
  // who might only upgrade a few times per year.
596
- uint32_t format_version = 6;
625
+ uint32_t format_version = 7;
626
+
627
+ // When true, data blocks store keys and values separately. Keys are stored
628
+ // at the beginning of the block, followed by values at the end. This can
629
+ // improve read performance at a cost of a varint per restart interval (~1 bit
630
+ // per key by default), in addition to improving compression. Small values or
631
+ // low block_restart_interval may prefer to set this as false.
632
+ //
633
+ // Default: false
634
+ bool separate_key_value_in_data_block = false;
635
+
636
+ // Coefficient of variation (CV) threshold used to determine if keys in an
637
+ // index block are uniformly distributed. Lower CV means more "uniform", and
638
+ // the more likely interpolation search will outperform binary search.
639
+ //
640
+ // On the write path, if the CV of key gaps in an index
641
+ // block is less than this threshold, the "is_uniform" hint is set in that
642
+ // block's footer. To disable (i.e. always have "is_uniform=false"), set value
643
+ // to -1.
644
+ //
645
+ // On the read path, if `BlockSearchType::kAuto` is set, then it will use the
646
+ // is_uniform hint to select an appropriate search algorithm for the block.
647
+ //
648
+ // NOTE: Currently only supports index blocks. May update to include data
649
+ // blocks in the future.
650
+ double uniform_cv_threshold = -1;
597
651
 
598
652
  // Store index blocks on disk in compressed format. Changing this option to
599
653
  // false will avoid the overhead of decompression if index blocks are evicted
@@ -685,17 +739,34 @@ struct BlockBasedTableOptions {
685
739
 
686
740
  // If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and
687
741
  // filter blocks) which are already in memory into block cache at the time of
688
- // flush. On a flush, the block that is in memory (in memtables) get flushed
689
- // to the device. If using Direct IO, additional IO is incurred to read this
690
- // data back into memory again, which is avoided by enabling this option. This
742
+ // flush or compaction.
743
+ //
744
+ // On a flush, the data block that is in memory (in memtables) gets flushed to
745
+ // the device. If using Direct IO, additional IO is incurred to read this data
746
+ // back into memory again, which is avoided by enabling this option. This
691
747
  // further helps if the workload exhibits high temporal locality, where most
692
748
  // of the reads go to recently written data. This also helps in case of
693
749
  // Distributed FileSystem.
750
+ //
751
+ // On a compaction, output SST files are written to disk but not placed in the
752
+ // block cache by default. With tiered or remote storage (e.g., HDFS, S3),
753
+ // reading recently compacted data back incurs high latency.
754
+ // Enabling compaction warming avoids these cold reads. However, unlike flush
755
+ // output, it is hard to distinguish hot from cold blocks in compaction
756
+ // output, so warming all of it risks polluting the cache. To mitigate this,
757
+ // compaction-warmed blocks are inserted at BOTTOM priority (vs LOW for flush)
758
+ // so they are evicted first under cache pressure. Even so,
759
+ // kFlushAndCompaction is recommended only when most or all of the database is
760
+ // expected to reside in cache. For workloads where only a fraction of the
761
+ // data is hot, kFlushOnly is the safer choice.
694
762
  enum class PrepopulateBlockCache : char {
695
763
  // Disable prepopulate block cache.
696
764
  kDisable,
697
765
  // Prepopulate blocks during flush only.
698
766
  kFlushOnly,
767
+ // Prepopulate blocks during flush and compaction. Flush-warmed blocks are
768
+ // inserted at LOW priority, compaction-warmed blocks at BOTTOM priority.
769
+ kFlushAndCompaction,
699
770
  };
700
771
 
701
772
  PrepopulateBlockCache prepopulate_block_cache =
@@ -77,6 +77,9 @@ struct TablePropertiesNames {
77
77
  static const std::string kUserDefinedTimestampsPersisted;
78
78
  static const std::string kKeyLargestSeqno;
79
79
  static const std::string kKeySmallestSeqno;
80
+ static const std::string kDataBlockRestartInterval;
81
+ static const std::string kIndexBlockRestartInterval;
82
+ static const std::string kSeparateKeyValueInDataBlock;
80
83
  };
81
84
 
82
85
  // `TablePropertiesCollector` provides the mechanism for users to collect
@@ -320,6 +323,18 @@ struct TableProperties {
320
323
 
321
324
  bool HasKeySmallestSeqno() const { return key_smallest_seqno != UINT64_MAX; }
322
325
 
326
+ // Block restart intervals used when building this SST file.
327
+ // 0 means unknown (for backwards compatibility with older SST files).
328
+ uint64_t data_block_restart_interval = 0;
329
+ uint64_t index_block_restart_interval = 0;
330
+
331
+ // Whether the SST file uses separated key/value storage in data blocks (0 =
332
+ // false). The block footer stores the real source of truth of whether the
333
+ // block has separated key values, but this table property is useful for
334
+ // debugging/validation purposes. Consider removing this if we ever decide to
335
+ // mix separation strategies for a sst.
336
+ uint64_t separate_key_value_in_data_block = 0;
337
+
323
338
  // DB identity
324
339
  // db_id is an identifier generated the first time the DB is created
325
340
  // If DB identity is unset or unassigned, `db_id` will be an empty string.
@@ -30,18 +30,21 @@ class ToolHooks {
30
30
  ToolHooks() = default;
31
31
  virtual ~ToolHooks() = default;
32
32
  virtual Status Open(const Options& db_options, const std::string& name,
33
- DB** dbptr) = 0;
33
+ std::unique_ptr<DB>* dbptr) = 0;
34
34
  virtual Status Open(
35
35
  const DBOptions& db_options, const std::string& name,
36
36
  const std::vector<ColumnFamilyDescriptor>& column_families,
37
- std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) = 0;
37
+ std::vector<ColumnFamilyHandle*>* handles,
38
+ std::unique_ptr<DB>* dbptr) = 0;
38
39
  virtual Status OpenForReadOnly(const Options& options,
39
- const std::string& name, DB** dbptr,
40
+ const std::string& name,
41
+ std::unique_ptr<DB>* dbptr,
40
42
  bool error_if_wal_file_exists) = 0;
41
43
  virtual Status OpenForReadOnly(
42
44
  const Options& options, const std::string& name,
43
45
  const std::vector<ColumnFamilyDescriptor>& column_families,
44
- std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) = 0;
46
+ std::vector<ColumnFamilyHandle*>* handles,
47
+ std::unique_ptr<DB>* dbptr) = 0;
45
48
  virtual Status OpenTransactionDB(const Options& db_options,
46
49
  const TransactionDBOptions& txn_db_options,
47
50
  const std::string& dbname,
@@ -62,7 +65,7 @@ class ToolHooks {
62
65
  virtual Status OpenAsSecondary(const Options& options,
63
66
  const std::string& name,
64
67
  const std::string& secondary_path,
65
- DB** dbptr) = 0;
68
+ std::unique_ptr<DB>* dbptr) = 0;
66
69
  virtual Status OpenAsFollower(const Options& options, const std::string& name,
67
70
  const std::string& leader_path,
68
71
  std::unique_ptr<DB>* dbptr) = 0;
@@ -77,18 +80,21 @@ class DefaultHooks : public ToolHooks {
77
80
  DefaultHooks() = default;
78
81
  ~DefaultHooks() override = default;
79
82
  virtual Status Open(const Options& db_options, const std::string& name,
80
- DB** dbptr) override;
83
+ std::unique_ptr<DB>* dbptr) override;
81
84
  virtual Status Open(
82
85
  const DBOptions& db_options, const std::string& name,
83
86
  const std::vector<ColumnFamilyDescriptor>& column_families,
84
- std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) override;
87
+ std::vector<ColumnFamilyHandle*>* handles,
88
+ std::unique_ptr<DB>* dbptr) override;
85
89
  virtual Status OpenForReadOnly(const Options& options,
86
- const std::string& name, DB** dbptr,
90
+ const std::string& name,
91
+ std::unique_ptr<DB>* dbptr,
87
92
  bool error_if_wal_file_exists) override;
88
93
  virtual Status OpenForReadOnly(
89
94
  const Options& options, const std::string& name,
90
95
  const std::vector<ColumnFamilyDescriptor>& column_families,
91
- std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) override;
96
+ std::vector<ColumnFamilyHandle*>* handles,
97
+ std::unique_ptr<DB>* dbptr) override;
92
98
  virtual Status OpenTransactionDB(const Options& db_options,
93
99
  const TransactionDBOptions& txn_db_options,
94
100
  const std::string& dbname,
@@ -110,7 +116,7 @@ class DefaultHooks : public ToolHooks {
110
116
  virtual Status OpenAsSecondary(const Options& options,
111
117
  const std::string& name,
112
118
  const std::string& secondary_path,
113
- DB** dbptr) override;
119
+ std::unique_ptr<DB>* dbptr) override;
114
120
  virtual Status OpenAsFollower(const Options& options, const std::string& name,
115
121
  const std::string& leader_path,
116
122
  std::unique_ptr<DB>* dbptr) override;
@@ -33,8 +33,8 @@ namespace ROCKSDB_NAMESPACE {
33
33
  // And assuming one generates many SST files in the lifetime of each process,
34
34
  // the probability of ID collisions is much "better than random"; see
35
35
  // https://github.com/pdillinger/unique_id
36
- Status GetUniqueIdFromTableProperties(const TableProperties &props,
37
- std::string *out_id);
36
+ Status GetUniqueIdFromTableProperties(const TableProperties& props,
37
+ std::string* out_id);
38
38
 
39
39
  // Computes a 192-bit (24 binary char) stable, universally unique ID
40
40
  // with an extra 64 bits of uniqueness compared to the standard ID. It is only
@@ -44,12 +44,12 @@ Status GetUniqueIdFromTableProperties(const TableProperties &props,
44
44
  // example above would expect a global file ID collision every 4 days with
45
45
  // 128-bit IDs (using some worst-case assumptions about process lifetime).
46
46
  // It's 10^17 years with 192-bit IDs.
47
- Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props,
48
- std::string *out_id);
47
+ Status GetExtendedUniqueIdFromTableProperties(const TableProperties& props,
48
+ std::string* out_id);
49
49
 
50
50
  // Converts a binary string (unique id) to hexadecimal, with each 64 bits
51
51
  // separated by '-', e.g. 6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B
52
52
  // Also works on unique id prefix.
53
- std::string UniqueIdToHumanString(const std::string &id);
53
+ std::string UniqueIdToHumanString(const std::string& id);
54
54
 
55
55
  } // namespace ROCKSDB_NAMESPACE
@@ -111,8 +111,6 @@ class CompactionOptionsUniversal {
111
111
  // Default: false
112
112
  bool incremental;
113
113
 
114
- // EXPERIMENTAL
115
- //
116
114
  // If true, auto universal compaction picking will adjust to minimize locking
117
115
  // of input files when bottom priority compactions are waiting to run. This
118
116
  // can increase the likelihood of existing L0s being selected for compaction,
@@ -120,7 +118,7 @@ class CompactionOptionsUniversal {
120
118
  // the overrall write amplification and compaction load on low priority
121
119
  // threads.
122
120
  //
123
- // Default: false (disabled)
121
+ // Default: true (enabled)
124
122
  //
125
123
  // This options does not apply to manual compactions.
126
124
  //
@@ -142,7 +140,7 @@ class CompactionOptionsUniversal {
142
140
  stop_style(kCompactionStopStyleTotalSize),
143
141
  allow_trivial_move(false),
144
142
  incremental(false),
145
- reduce_file_locking(false) {}
143
+ reduce_file_locking(true) {}
146
144
 
147
145
  bool operator==(const CompactionOptionsUniversal& rhs) const = default;
148
146
  };