@nxtedition/rocksdb 15.4.1 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (399) hide show
  1. package/binding.cc +24 -15
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/iterator.js +2 -2
  395. package/package.json +1 -1
  396. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  397. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -32,4 +32,34 @@ enum class BlockType : uint8_t {
32
32
  kInvalid
33
33
  };
34
34
 
35
+ inline const char* BlockTypeToString(BlockType block_type) {
36
+ switch (block_type) {
37
+ case BlockType::kData:
38
+ return "Data";
39
+ case BlockType::kFilter:
40
+ return "Filter";
41
+ case BlockType::kFilterPartitionIndex:
42
+ return "FilterPartitionIndex";
43
+ case BlockType::kProperties:
44
+ return "Properties";
45
+ case BlockType::kCompressionDictionary:
46
+ return "CompressionDictionary";
47
+ case BlockType::kRangeDeletion:
48
+ return "RangeDeletion";
49
+ case BlockType::kHashIndexPrefixes:
50
+ return "HashIndexPrefixes";
51
+ case BlockType::kHashIndexMetadata:
52
+ return "HashIndexMetadata";
53
+ case BlockType::kMetaIndex:
54
+ return "MetaIndex";
55
+ case BlockType::kIndex:
56
+ return "Index";
57
+ case BlockType::kUserDefinedIndex:
58
+ return "UserDefinedIndex";
59
+ case BlockType::kInvalid:
60
+ return "Invalid";
61
+ }
62
+ return "Unknown";
63
+ }
64
+
35
65
  } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,156 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+
8
+ #include <algorithm>
9
+ #include <cassert>
10
+ #include <cstdint>
11
+ #include <cstring>
12
+
13
+ #include "db/dbformat.h"
14
+ #include "port/port.h"
15
+ #include "rocksdb/slice.h"
16
+ #include "util/coding.h"
17
+ #include "util/math.h"
18
+
19
+ namespace ROCKSDB_NAMESPACE {
20
+
21
+ // Decode the next block entry starting at "p", storing the number of shared key
22
+ // bytes, non_shared key bytes, and the length of the value in "*shared",
23
+ // "*non_shared", and "*value_length", respectively. Will not dereference past
24
+ // "limit".
25
+ //
26
+ // If any errors are detected, returns nullptr. Otherwise, returns a
27
+ // pointer to the key delta (just past the three decoded values).
28
+ struct DecodeEntry {
29
+ inline const char* operator()(const char* p, const char* limit,
30
+ uint32_t* shared, uint32_t* non_shared,
31
+ uint32_t* value_length,
32
+ uint32_t* value_offset) {
33
+ // We need 2 bytes for shared and non_shared size. We also need one more
34
+ // byte either for value size or the actual value in case of value delta
35
+ // encoding.
36
+ assert(limit - p >= 3);
37
+ *shared = reinterpret_cast<const unsigned char*>(p)[0];
38
+ *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
39
+ *value_length = reinterpret_cast<const unsigned char*>(p)[2];
40
+ if ((*shared | *non_shared | *value_length) < 128) {
41
+ // Fast path: all three values are encoded in one byte each
42
+ p += 3;
43
+ } else {
44
+ if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) {
45
+ return nullptr;
46
+ }
47
+ if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) {
48
+ return nullptr;
49
+ }
50
+ if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) {
51
+ return nullptr;
52
+ }
53
+ }
54
+
55
+ if (value_offset) {
56
+ if ((p = GetVarint32Ptr(p, limit, value_offset)) == nullptr) {
57
+ return nullptr;
58
+ }
59
+ }
60
+
61
+ return p;
62
+ }
63
+ };
64
+
65
+ struct DecodeKey {
66
+ inline const char* operator()(const char* p, const char* limit,
67
+ uint32_t* shared, uint32_t* non_shared,
68
+ uint32_t* value_offset) {
69
+ uint32_t value_length;
70
+ return DecodeEntry()(p, limit, shared, non_shared, &value_length,
71
+ value_offset);
72
+ }
73
+ };
74
+
75
+ // In format_version 4, which is used by index blocks, the value size is not
76
+ // encoded before the entry, as the value is known to be the handle with the
77
+ // known size.
78
+ struct DecodeKeyV4 {
79
+ inline const char* operator()(const char* p, const char* limit,
80
+ uint32_t* shared, uint32_t* non_shared,
81
+ uint32_t* value_offset) {
82
+ // We need 2 bytes for shared and non_shared size. We also need one more
83
+ // byte either for value size or the actual value in case of value delta
84
+ // encoding.
85
+ if (limit - p < 3) {
86
+ return nullptr;
87
+ }
88
+ *shared = reinterpret_cast<const unsigned char*>(p)[0];
89
+ *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
90
+ if ((*shared | *non_shared) < 128) {
91
+ // Fast path: all three values are encoded in one byte each
92
+ p += 2;
93
+ } else {
94
+ if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) {
95
+ return nullptr;
96
+ }
97
+ if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) {
98
+ return nullptr;
99
+ }
100
+ }
101
+
102
+ if (value_offset) {
103
+ if ((p = GetVarint32Ptr(p, limit, value_offset)) == nullptr) {
104
+ return nullptr;
105
+ }
106
+ }
107
+ return p;
108
+ }
109
+ };
110
+
111
+ struct DecodeEntryV4 {
112
+ inline const char* operator()(const char* p, const char* limit,
113
+ uint32_t* shared, uint32_t* non_shared,
114
+ uint32_t* value_length,
115
+ uint32_t* value_offset) {
116
+ assert(value_length);
117
+
118
+ *value_length = 0;
119
+ return DecodeKeyV4()(p, limit, shared, non_shared, value_offset);
120
+ }
121
+ };
122
+
123
+ // Read first 8 bytes (starting at offset) as big-endian uint64_t, padding
124
+ // with zeros on the right if the key is shorter. This preserves
125
+ // lexicographic ordering.
126
+ //
127
+ // If s.size() <= offset, then returns 0.
128
+ inline uint64_t ReadBe64FromKey(Slice s, bool is_user_key, size_t offset) {
129
+ if (!is_user_key) {
130
+ assert(s.size() >= kNumInternalBytes);
131
+ s = Slice(s.data(), s.size() - kNumInternalBytes);
132
+ }
133
+ offset = std::min(offset, s.size());
134
+ size_t remaining = s.size() - offset;
135
+
136
+ // fast path
137
+ if (remaining >= 8) {
138
+ uint64_t val;
139
+ memcpy(&val, s.data() + offset, sizeof(val));
140
+ if (port::kLittleEndian) {
141
+ return EndianSwapValue(val);
142
+ }
143
+ return val;
144
+ }
145
+
146
+ uint64_t val = 0;
147
+ for (size_t i = 0; i < remaining; i++) {
148
+ val = (val << 8) | static_cast<uint8_t>(s.data()[offset + i]);
149
+ }
150
+ if (remaining > 0) {
151
+ val <<= (8 - remaining) * 8; // Pad zeros on the right
152
+ }
153
+ return val;
154
+ }
155
+
156
+ } // namespace ROCKSDB_NAMESPACE
@@ -9,51 +9,94 @@
9
9
 
10
10
  #include "table/block_based/data_block_footer.h"
11
11
 
12
- #include "rocksdb/table.h"
12
+ #include "util/coding.h"
13
13
 
14
14
  namespace ROCKSDB_NAMESPACE {
15
15
 
16
- const int kDataBlockIndexTypeBitShift = 31;
16
+ // Hash index bit (bit 31)
17
+ constexpr uint32_t kHashIndexBit = 1u << 31;
18
+ // Uniform keys bit (bit 29) - indicates keys are uniformly distributed
19
+ constexpr uint32_t kUniformKeysBit = 1u << 29;
20
+ // Separated KV storage bit (bit 28)
21
+ constexpr uint32_t kSeparatedKVBit = 1u << 28;
17
22
 
18
- // 0x7FFFFFFF
19
- const uint32_t kMaxNumRestarts = (1u << kDataBlockIndexTypeBitShift) - 1u;
23
+ void DataBlockFooter::EncodeTo(std::string* dst) const {
24
+ assert(num_restarts <= kMaxNumRestarts);
20
25
 
21
- // 0x7FFFFFFF
22
- const uint32_t kNumRestartsMask = (1u << kDataBlockIndexTypeBitShift) - 1u;
23
-
24
- uint32_t PackIndexTypeAndNumRestarts(
25
- BlockBasedTableOptions::DataBlockIndexType index_type,
26
- uint32_t num_restarts) {
27
- if (num_restarts > kMaxNumRestarts) {
28
- assert(0); // mute travis "unused" warning
26
+ // If separated KV, write the values_section_offset before the packed footer
27
+ if (separated_kv) {
28
+ PutFixed32(dst, values_section_offset);
29
29
  }
30
30
 
31
- uint32_t block_footer = num_restarts;
31
+ uint32_t packed = num_restarts;
32
32
  if (index_type == BlockBasedTableOptions::kDataBlockBinaryAndHash) {
33
- block_footer |= 1u << kDataBlockIndexTypeBitShift;
34
- } else if (index_type != BlockBasedTableOptions::kDataBlockBinarySearch) {
35
- assert(0);
33
+ packed |= kHashIndexBit;
34
+ } else {
35
+ assert(index_type == BlockBasedTableOptions::kDataBlockBinarySearch);
36
+ }
37
+ if (separated_kv) {
38
+ packed |= kSeparatedKVBit;
39
+ }
40
+ if (is_uniform) {
41
+ packed |= kUniformKeysBit;
36
42
  }
37
43
 
38
- return block_footer;
44
+ PutFixed32(dst, packed);
39
45
  }
40
46
 
41
- void UnPackIndexTypeAndNumRestarts(
42
- uint32_t block_footer,
43
- BlockBasedTableOptions::DataBlockIndexType* index_type,
44
- uint32_t* num_restarts) {
45
- if (index_type) {
46
- if (block_footer & 1u << kDataBlockIndexTypeBitShift) {
47
- *index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash;
48
- } else {
49
- *index_type = BlockBasedTableOptions::kDataBlockBinarySearch;
50
- }
47
+ Status DataBlockFooter::DecodeFrom(Slice* input) {
48
+ if (input->size() < kMinEncodedLength) {
49
+ return Status::Corruption("Block too small for footer");
50
+ }
51
+
52
+ // Decode from the end of the input
53
+ const char* footer_ptr = input->data() + input->size() - sizeof(uint32_t);
54
+ uint32_t packed = DecodeFixed32(footer_ptr);
55
+
56
+ if (packed & kHashIndexBit) {
57
+ index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash;
58
+ packed &= ~kHashIndexBit;
59
+ } else {
60
+ index_type = BlockBasedTableOptions::kDataBlockBinarySearch;
61
+ }
62
+
63
+ if (packed & kSeparatedKVBit) {
64
+ separated_kv = true;
65
+ packed &= ~kSeparatedKVBit;
66
+ } else {
67
+ separated_kv = false;
51
68
  }
52
69
 
53
- if (num_restarts) {
54
- *num_restarts = block_footer & kNumRestartsMask;
55
- assert(*num_restarts <= kMaxNumRestarts);
70
+ if (packed & kUniformKeysBit) {
71
+ is_uniform = true;
72
+ packed &= ~kUniformKeysBit;
73
+ } else {
74
+ is_uniform = false;
56
75
  }
76
+
77
+ // Check for reserved/unrecognized feature bits (anything beyond
78
+ // kMaxNumRestarts)
79
+ if (packed > kMaxNumRestarts) {
80
+ return Status::Corruption(
81
+ "Unrecognized feature in block footer (reserved bits set)");
82
+ }
83
+
84
+ num_restarts = packed;
85
+
86
+ input->remove_suffix(sizeof(uint32_t));
87
+
88
+ // If separated KV, read values_section_offset from before the packed footer
89
+ if (separated_kv) {
90
+ if (input->size() < sizeof(uint32_t)) {
91
+ return Status::Corruption(
92
+ "Block too small for separated KV values section offset");
93
+ }
94
+ values_section_offset =
95
+ DecodeFixed32(input->data() + input->size() - sizeof(uint32_t));
96
+ input->remove_suffix(sizeof(uint32_t));
97
+ }
98
+
99
+ return Status::OK();
57
100
  }
58
101
 
59
102
  } // namespace ROCKSDB_NAMESPACE
@@ -9,17 +9,84 @@
9
9
 
10
10
  #pragma once
11
11
 
12
+ #include <cstdint>
13
+ #include <string>
14
+
15
+ #include "rocksdb/slice.h"
16
+ #include "rocksdb/status.h"
12
17
  #include "rocksdb/table.h"
13
18
 
14
19
  namespace ROCKSDB_NAMESPACE {
15
20
 
16
- uint32_t PackIndexTypeAndNumRestarts(
17
- BlockBasedTableOptions::DataBlockIndexType index_type,
18
- uint32_t num_restarts);
21
+ // DataBlockFooter represents the footer of a data block, containing metadata
22
+ // about the block's structure and features.
23
+ //
24
+ // Current encoding (may expand in future format versions):
25
+ // - A single uint32_t where:
26
+ // - The low 28 bits store the number of restart points (num_restarts)
27
+ // - The high 4 bits are reserved for metadata/features:
28
+ // - Bit 31: Hash index present (kDataBlockBinaryAndHash)
29
+ // - Bit 30: IMPORTANT: Cannot be used without format version bump.
30
+ // - Bit 29: Uniform keys flag (for kAuto index block search)
31
+ // - Bit 28: Separated KV storage (keys and values stored in separate
32
+ // sections within the block)
33
+ //
34
+ // Note on forward compatibility: Bits 28-29 can be read by older versions of
35
+ // RocksDB and interpreted as an extremely large, which will be caught as a
36
+ // corruption. Bit 30 is special because num_restarts is multipled by 4, causing
37
+ // overflow and the corruption check to be silently ignored
38
+ // (https://github.com/facebook/rocksdb/blob/10.11.fb/table/block_based/block.cc#L1070-L1103)
39
+ //
40
+ // When separated KV is enabled, an additional uint32_t is prepended before the
41
+ // packed footer, storing the offset to the values section within the block.
42
+ //
43
+ // When any unrecognized reserved bit is set, DecodeFrom() returns an error,
44
+ // allowing older versions to fail gracefully on newer formats.
45
+ //
46
+ // The encoding size is not fixed - future format versions may expand it.
47
+ // Use kMaxEncodedLength for buffer sizing.
48
+ struct DataBlockFooter {
49
+ // Maximum number of restarts that can be stored (2^28 - 1 = 268,435,455).
50
+ // This reserves the top 4 bits for metadata (bit 31 for hash index, bits
51
+ // 28-30 for future features). For historical compatibility purposes, the
52
+ // limit is adequate because a 4GiB block (maximum due to 32-bit block size)
53
+ // with restart_interval=1 and minimum entries (12 bytes: 3 varint bytes +
54
+ // 9-byte internal key + empty value) plus 4-byte restart offsets = 16 bytes
55
+ // per restart, fits at most (2^32 - 4) / 16 ≈ 268 million restarts.
56
+ static constexpr uint32_t kMaxNumRestarts = (1u << 28) - 1;
57
+
58
+ // Maximum encoded length of a DataBlockFooter (for buffer sizing).
59
+ // 8 bytes when separated KV is enabled (values_section_offset + packed),
60
+ // 4 bytes otherwise.
61
+ static constexpr uint32_t kMaxEncodedLength = 2 * sizeof(uint32_t);
62
+
63
+ // Minimum encoded length (for current format version)
64
+ static constexpr uint32_t kMinEncodedLength = sizeof(uint32_t);
65
+
66
+ BlockBasedTableOptions::DataBlockIndexType index_type =
67
+ BlockBasedTableOptions::kDataBlockBinarySearch;
68
+
69
+ // Whether the block uses separated KV storage (keys and values in separate
70
+ // sections). When true, values_section_offset indicates where the values
71
+ // section begins within the block data.
72
+ bool separated_kv = false;
73
+ uint32_t values_section_offset = 0;
74
+
75
+ uint32_t num_restarts = 0;
76
+ bool is_uniform = false;
77
+
78
+ DataBlockFooter() = default;
79
+ DataBlockFooter(BlockBasedTableOptions::DataBlockIndexType _index_type,
80
+ uint32_t _num_restarts)
81
+ : index_type(_index_type), num_restarts(_num_restarts) {}
82
+
83
+ // Appends the encoded footer to dst.
84
+ void EncodeTo(std::string* dst) const;
19
85
 
20
- void UnPackIndexTypeAndNumRestarts(
21
- uint32_t block_footer,
22
- BlockBasedTableOptions::DataBlockIndexType* index_type,
23
- uint32_t* num_restarts);
86
+ // Decodes a footer from the end of input (consumes bytes from the end).
87
+ // Returns an error if reserved/unrecognized feature bits are set.
88
+ // On success, advances input to exclude the consumed footer bytes.
89
+ Status DecodeFrom(Slice* input);
90
+ };
24
91
 
25
92
  } // namespace ROCKSDB_NAMESPACE
@@ -122,7 +122,7 @@ class DataBlockHashIndex {
122
122
 
123
123
  uint8_t Lookup(const char* data, uint32_t map_offset, const Slice& key) const;
124
124
 
125
- inline bool Valid() { return num_buckets_ != 0; }
125
+ inline bool Valid() const { return num_buckets_ != 0; }
126
126
 
127
127
  private:
128
128
  // To make the serialized hash index compact and to save the space overhead,
@@ -29,7 +29,7 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
29
29
  const InternalKeySliceTransform* int_key_slice_transform,
30
30
  const bool use_value_delta_encoding,
31
31
  const BlockBasedTableOptions& table_opt, size_t ts_sz,
32
- const bool persist_user_defined_timestamps) {
32
+ const bool persist_user_defined_timestamps, Statistics* statistics) {
33
33
  IndexBuilder* result = nullptr;
34
34
  switch (index_type) {
35
35
  case BlockBasedTableOptions::kBinarySearch: {
@@ -37,7 +37,8 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
37
37
  comparator, table_opt.index_block_restart_interval,
38
38
  table_opt.format_version, use_value_delta_encoding,
39
39
  table_opt.index_shortening, /* include_first_key */ false, ts_sz,
40
- persist_user_defined_timestamps);
40
+ persist_user_defined_timestamps, statistics,
41
+ table_opt.uniform_cv_threshold);
41
42
  break;
42
43
  }
43
44
  case BlockBasedTableOptions::kHashSearch: {
@@ -48,7 +49,7 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
48
49
  comparator, int_key_slice_transform,
49
50
  table_opt.index_block_restart_interval, table_opt.format_version,
50
51
  use_value_delta_encoding, table_opt.index_shortening, ts_sz,
51
- persist_user_defined_timestamps);
52
+ persist_user_defined_timestamps, table_opt.uniform_cv_threshold);
52
53
  break;
53
54
  }
54
55
  case BlockBasedTableOptions::kTwoLevelIndexSearch: {
@@ -62,11 +63,12 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
62
63
  comparator, table_opt.index_block_restart_interval,
63
64
  table_opt.format_version, use_value_delta_encoding,
64
65
  table_opt.index_shortening, /* include_first_key */ true, ts_sz,
65
- persist_user_defined_timestamps);
66
+ persist_user_defined_timestamps, statistics,
67
+ table_opt.uniform_cv_threshold);
66
68
  break;
67
69
  }
68
70
  default: {
69
- assert(!"Do not recognize the index type ");
71
+ assert(false && "Do not recognize the index type ");
70
72
  break;
71
73
  }
72
74
  }
@@ -135,30 +137,32 @@ PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
135
137
  const InternalKeyComparator* comparator,
136
138
  const bool use_value_delta_encoding,
137
139
  const BlockBasedTableOptions& table_opt, size_t ts_sz,
138
- const bool persist_user_defined_timestamps) {
139
- return new PartitionedIndexBuilder(comparator, table_opt,
140
- use_value_delta_encoding, ts_sz,
141
- persist_user_defined_timestamps);
140
+ const bool persist_user_defined_timestamps, Statistics* statistics) {
141
+ return new PartitionedIndexBuilder(
142
+ comparator, table_opt, use_value_delta_encoding, ts_sz,
143
+ persist_user_defined_timestamps, statistics);
142
144
  }
143
145
 
144
146
  PartitionedIndexBuilder::PartitionedIndexBuilder(
145
147
  const InternalKeyComparator* comparator,
146
148
  const BlockBasedTableOptions& table_opt,
147
149
  const bool use_value_delta_encoding, size_t ts_sz,
148
- const bool persist_user_defined_timestamps)
150
+ const bool persist_user_defined_timestamps, Statistics* statistics)
149
151
  : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps),
150
152
  index_block_builder_(
151
153
  table_opt.index_block_restart_interval, true /*use_delta_encoding*/,
152
154
  use_value_delta_encoding,
153
155
  BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
154
156
  0.75 /* data_block_hash_table_util_ratio */, ts_sz,
155
- persist_user_defined_timestamps, false /* is_user_key */),
157
+ persist_user_defined_timestamps, false /* is_user_key */,
158
+ /*use_separated_kv_storage=*/false),
156
159
  index_block_builder_without_seq_(
157
160
  table_opt.index_block_restart_interval, true /*use_delta_encoding*/,
158
161
  use_value_delta_encoding,
159
162
  BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
160
163
  0.75 /* data_block_hash_table_util_ratio */, ts_sz,
161
- persist_user_defined_timestamps, true /* is_user_key */),
164
+ persist_user_defined_timestamps, true /* is_user_key */,
165
+ /*use_separated_kv_storage=*/false),
162
166
  table_opt_(table_opt),
163
167
  // We start by false. After each partition we revise the value based on
164
168
  // what the sub_index_builder has decided. If the feature is disabled
@@ -167,7 +171,8 @@ PartitionedIndexBuilder::PartitionedIndexBuilder(
167
171
  // sub_index_builders could not safely exclude seq from the keys, then it
168
172
  // wil be enforced on all sub_index_builders on ::Finish.
169
173
  must_use_separator_with_seq_(false),
170
- use_value_delta_encoding_(use_value_delta_encoding) {
174
+ use_value_delta_encoding_(use_value_delta_encoding),
175
+ statistics_(statistics) {
171
176
  MakeNewSubIndexBuilder();
172
177
  }
173
178
 
@@ -176,7 +181,8 @@ void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
176
181
  comparator_, table_opt_.index_block_restart_interval,
177
182
  table_opt_.format_version, use_value_delta_encoding_,
178
183
  table_opt_.index_shortening, /* include_first_key */ false, ts_sz_,
179
- persist_user_defined_timestamps_);
184
+ persist_user_defined_timestamps_, statistics_,
185
+ table_opt_.uniform_cv_threshold);
180
186
  sub_index_builder_ = new_builder.get();
181
187
  // Start next partition entry, where we will modify the key
182
188
  entries_.push_back({{}, std::move(new_builder)});
@@ -20,6 +20,7 @@
20
20
  #include "table/block_based/block_builder.h"
21
21
  #include "table/block_based/flush_block_policy_impl.h"
22
22
  #include "table/format.h"
23
+ #include "util/atomic.h"
23
24
 
24
25
  namespace ROCKSDB_NAMESPACE {
25
26
  // The interface for building index.
@@ -39,7 +40,8 @@ class IndexBuilder {
39
40
  const InternalKeyComparator* comparator,
40
41
  const InternalKeySliceTransform* int_key_slice_transform,
41
42
  bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt,
42
- size_t ts_sz, bool persist_user_defined_timestamps);
43
+ size_t ts_sz, bool persist_user_defined_timestamps,
44
+ Statistics* statistics = nullptr);
43
45
 
44
46
  // Index builder will construct a set of blocks which contain:
45
47
  // 1. One primary index block.
@@ -226,20 +228,25 @@ class ShortenedIndexBuilder : public IndexBuilder {
226
228
  const bool use_value_delta_encoding,
227
229
  BlockBasedTableOptions::IndexShorteningMode shortening_mode,
228
230
  bool include_first_key, size_t ts_sz,
229
- const bool persist_user_defined_timestamps)
231
+ const bool persist_user_defined_timestamps, Statistics* statistics,
232
+ double uniform_cv_threshold)
230
233
  : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps),
231
234
  index_block_builder_(
232
235
  index_block_restart_interval, true /*use_delta_encoding*/,
233
236
  use_value_delta_encoding,
234
237
  BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
235
238
  0.75 /* data_block_hash_table_util_ratio */, ts_sz,
236
- persist_user_defined_timestamps, false /* is_user_key */),
239
+ persist_user_defined_timestamps, false /* is_user_key */,
240
+ false /* use_separated_kv_storage */, statistics,
241
+ uniform_cv_threshold),
237
242
  index_block_builder_without_seq_(
238
243
  index_block_restart_interval, true /*use_delta_encoding*/,
239
244
  use_value_delta_encoding,
240
245
  BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
241
246
  0.75 /* data_block_hash_table_util_ratio */, ts_sz,
242
- persist_user_defined_timestamps, true /* is_user_key */),
247
+ persist_user_defined_timestamps, true /* is_user_key */,
248
+ false /* use_separated_kv_storage */, statistics,
249
+ uniform_cv_threshold),
243
250
  use_value_delta_encoding_(use_value_delta_encoding),
244
251
  include_first_key_(include_first_key),
245
252
  shortening_mode_(shortening_mode) {
@@ -513,12 +520,14 @@ class HashIndexBuilder : public IndexBuilder {
513
520
  int index_block_restart_interval, int format_version,
514
521
  bool use_value_delta_encoding,
515
522
  BlockBasedTableOptions::IndexShorteningMode shortening_mode,
516
- size_t ts_sz, const bool persist_user_defined_timestamps)
523
+ size_t ts_sz, const bool persist_user_defined_timestamps,
524
+ double uniform_cv_threshold)
517
525
  : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps),
518
526
  primary_index_builder_(comparator, index_block_restart_interval,
519
527
  format_version, use_value_delta_encoding,
520
528
  shortening_mode, /* include_first_key */ false,
521
- ts_sz, persist_user_defined_timestamps),
529
+ ts_sz, persist_user_defined_timestamps,
530
+ nullptr /* statistics */, uniform_cv_threshold),
522
531
  hash_key_extractor_(hash_key_extractor) {}
523
532
 
524
533
  Slice AddIndexEntry(const Slice& last_key_in_current_block,
@@ -608,10 +617,9 @@ class HashIndexBuilder : public IndexBuilder {
608
617
  void FlushPendingPrefix() {
609
618
  prefix_block_.append(pending_entry_prefix_.data(),
610
619
  pending_entry_prefix_.size());
611
- PutVarint32Varint32Varint32(
612
- &prefix_meta_block_,
613
- static_cast<uint32_t>(pending_entry_prefix_.size()),
614
- pending_entry_index_, pending_block_num_);
620
+ PutVarint32(&prefix_meta_block_,
621
+ static_cast<uint32_t>(pending_entry_prefix_.size()),
622
+ pending_entry_index_, pending_block_num_);
615
623
  }
616
624
 
617
625
  ShortenedIndexBuilder primary_index_builder_;
@@ -647,12 +655,13 @@ class PartitionedIndexBuilder : public IndexBuilder {
647
655
  static PartitionedIndexBuilder* CreateIndexBuilder(
648
656
  const InternalKeyComparator* comparator, bool use_value_delta_encoding,
649
657
  const BlockBasedTableOptions& table_opt, size_t ts_sz,
650
- bool persist_user_defined_timestamps);
658
+ bool persist_user_defined_timestamps, Statistics* statistics = nullptr);
651
659
 
652
660
  PartitionedIndexBuilder(const InternalKeyComparator* comparator,
653
661
  const BlockBasedTableOptions& table_opt,
654
662
  bool use_value_delta_encoding, size_t ts_sz,
655
- bool persist_user_defined_timestamps);
663
+ bool persist_user_defined_timestamps,
664
+ Statistics* statistics = nullptr);
656
665
 
657
666
  Slice AddIndexEntry(const Slice& last_key_in_current_block,
658
667
  const Slice* first_key_in_next_block,
@@ -745,6 +754,7 @@ class PartitionedIndexBuilder : public IndexBuilder {
745
754
  // true if it should cut the next filter partition block
746
755
  bool cut_filter_block = false;
747
756
  BlockHandle last_encoded_handle_;
757
+ Statistics* statistics_;
748
758
  // Cached estimate of current index size, updated when data blocks are added
749
759
  RelaxedAtomic<uint64_t> estimated_index_size_{0};
750
760
  // Running estimate of completed partitions total size
@@ -32,7 +32,7 @@ class MockBlockBasedTableTester {
32
32
 
33
33
  explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy)
34
34
  : MockBlockBasedTableTester(
35
- std::shared_ptr<const FilterPolicy>(filter_policy)){};
35
+ std::shared_ptr<const FilterPolicy>(filter_policy)) {};
36
36
 
37
37
  explicit MockBlockBasedTableTester(
38
38
  std::shared_ptr<const FilterPolicy> filter_policy)