@nxtedition/rocksdb 15.4.1 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. package/binding.cc +70 -23
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/index.js +70 -10
  395. package/iterator.js +25 -3
  396. package/max_rev_operator.h +9 -5
  397. package/package.json +1 -1
  398. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  399. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -155,8 +155,11 @@ class BlockReadAmpBitmap {
155
155
  class Block {
156
156
  public:
157
157
  // Initialize the block with the specified contents.
158
+ // If restart_interval is provided (non-zero), it will be stored directly
159
+ // instead of being calculated later.
158
160
  explicit Block(BlockContents&& contents, size_t read_amp_bytes_per_bit = 0,
159
- Statistics* statistics = nullptr);
161
+ Statistics* statistics = nullptr,
162
+ uint32_t restart_interval = 1);
160
163
  // No copying allowed
161
164
  Block(const Block&) = delete;
162
165
  void operator=(const Block&) = delete;
@@ -167,8 +170,9 @@ class Block {
167
170
  const char* data() const { return contents_.data.data(); }
168
171
  // The additional memory space taken by the block data.
169
172
  size_t usable_size() const { return contents_.usable_size(); }
170
- uint32_t NumRestarts() const;
173
+ uint32_t NumRestarts() const { return num_restarts_; }
171
174
  bool own_bytes() const { return contents_.own_bytes(); }
175
+ bool IsUniform() const { return is_uniform_; }
172
176
 
173
177
  BlockBasedTableOptions::DataBlockIndexType IndexType() const;
174
178
 
@@ -233,13 +237,19 @@ class Block {
233
237
  // It is determined by IndexType property of the table.
234
238
  // `user_defined_timestamps_persisted` controls whether a min timestamp is
235
239
  // padded while key is being parsed from the block.
240
+ // `index_block_search_type` controls which search algorithm to use when
241
+ // reading the index block. kBinary uses binary search, while
242
+ // kInterpolation uses interpolation search which can be faster
243
+ // for uniformly distributed keys.
236
244
  IndexBlockIter* NewIndexIterator(
237
245
  const Comparator* raw_ucmp, SequenceNumber global_seqno,
238
246
  IndexBlockIter* iter, Statistics* stats, bool total_order_seek,
239
247
  bool have_first_key, bool key_includes_seq, bool value_is_full,
240
248
  bool block_contents_pinned = false,
241
249
  bool user_defined_timestamps_persisted = true,
242
- BlockPrefixIndex* prefix_index = nullptr);
250
+ BlockPrefixIndex* prefix_index = nullptr,
251
+ BlockBasedTableOptions::BlockSearchType index_block_search_type =
252
+ BlockBasedTableOptions::kBinary);
243
253
 
244
254
  // Report an approximation of how much memory has been used.
245
255
  size_t ApproximateMemoryUsage() const;
@@ -273,12 +283,22 @@ class Block {
273
283
  ProtectionInfo64().ProtectKV(key, value).Encode(checksum_len, checksum_ptr);
274
284
  }
275
285
 
286
+ bool HasSeparatedKV() const { return values_section_ != nullptr; }
287
+
276
288
  const char* TEST_GetKVChecksum() const { return kv_checksum_; }
277
289
 
278
290
  private:
291
+ // Returns a detailed error status by re-processing the footer.
292
+ // Should only be called when size() == 0 (error marker).
293
+ Status GetCorruptionStatus() const;
294
+
279
295
  BlockContents contents_;
280
- uint32_t restart_offset_; // Offset in data_ of restart array
296
+ // Normal state: offset in data_ of restart array.
297
+ // Error state (size()==0): original data size if footer decode failed,
298
+ // otherwise 0. Used by GetCorruptionStatus() to re-decode footer.
299
+ uint32_t restart_offset_;
281
300
  uint32_t num_restarts_;
301
+ bool is_uniform_{false};
282
302
  std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
283
303
  char* kv_checksum_{nullptr};
284
304
  uint32_t checksum_size_{0};
@@ -286,6 +306,9 @@ class Block {
286
306
  uint32_t block_restart_interval_{0};
287
307
  uint8_t protection_bytes_per_key_{0};
288
308
  DataBlockHashIndex data_block_hash_index_;
309
+
310
+ // Pointer to values section, nullptr if not using separated KV
311
+ const char* values_section_{nullptr};
289
312
  };
290
313
 
291
314
  // A `BlockIter` iterates over the entries in a `Block`'s data buffer. The
@@ -325,7 +348,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
325
348
  assert(!pinned_iters_mgr_ || !pinned_iters_mgr_->PinningEnabled());
326
349
 
327
350
  data_ = nullptr;
328
- current_ = restarts_;
351
+ current_ = GetKeysEndOffset();
329
352
  status_ = s;
330
353
 
331
354
  // Call cleanup callbacks.
@@ -334,8 +357,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
334
357
 
335
358
  bool Valid() const override {
336
359
  // When status_ is not ok, iter should be invalid.
337
- assert(status_.ok() || current_ >= restarts_);
338
- return current_ < restarts_;
360
+ auto key_end = GetKeysEndOffset();
361
+ assert(status_.ok() || current_ >= key_end);
362
+ auto valid = current_ < key_end;
363
+ return valid;
339
364
  }
340
365
 
341
366
  void SeekToFirst() override final {
@@ -426,10 +451,15 @@ class BlockIter : public InternalIteratorBase<TValue> {
426
451
  Cache::Handle* cache_handle() { return cache_handle_; }
427
452
 
428
453
  protected:
429
- std::unique_ptr<InternalKeyComparator> icmp_;
454
+ InternalKeyComparator icmp_;
430
455
  const char* data_; // underlying block contents
431
456
  uint32_t num_restarts_; // Number of uint32_t entries in restart array
432
457
 
458
+ const char* values_section_;
459
+ // Slice of current entry in the data section. Does not contain value if
460
+ // values_section_ exists
461
+ Slice entry_;
462
+
433
463
  // Index of restart block in which current_ or current_-1 falls
434
464
  uint32_t restart_index_;
435
465
  uint32_t restarts_; // Offset of restart array (list of fixed32)
@@ -460,6 +490,8 @@ class BlockIter : public InternalIteratorBase<TValue> {
460
490
 
461
491
  // Per key-value checksum related states
462
492
  const char* kv_checksum_;
493
+ // Index of the next entry to be parsed (used for checksum verification
494
+ // and to determine if we're at a restart point for separated KV storage)
463
495
  int32_t cur_entry_idx_;
464
496
  uint32_t block_restart_interval_;
465
497
  uint8_t protection_bytes_per_key_;
@@ -502,7 +534,11 @@ class BlockIter : public InternalIteratorBase<TValue> {
502
534
  uint32_t count = (num_restarts_ - 1) * block_restart_interval;
503
535
  // Add number of keys from the last restart interval
504
536
  SeekToRestartPoint(num_restarts_ - 1);
505
- while (NextEntryOffset() < restarts_ && status_.ok()) {
537
+ // For separated KV storage, keys end at values_section_, not at restarts_
538
+ uint32_t keys_end = values_section_
539
+ ? static_cast<uint32_t>(values_section_ - data_)
540
+ : restarts_;
541
+ while (NextEntryOffset() < keys_end && status_.ok()) {
506
542
  NextImpl();
507
543
  ++count;
508
544
  }
@@ -514,7 +550,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
514
550
  // Sets raw_key_, value_ to the current parsed key and value.
515
551
  // Sets restart_index_ to point to the restart interval that contains
516
552
  // the current key.
517
- template <typename DecodeEntryFunc>
553
+ template <typename DecodeEntryFunc, bool StrictCheck = false>
518
554
  inline bool ParseNextKey(bool* is_shared);
519
555
 
520
556
  // protection_bytes_per_key, kv_checksum, and block_restart_interval
@@ -523,22 +559,20 @@ class BlockIter : public InternalIteratorBase<TValue> {
523
559
  uint32_t restarts, uint32_t num_restarts,
524
560
  SequenceNumber global_seqno, bool block_contents_pinned,
525
561
  bool user_defined_timestamp_persisted,
526
-
527
562
  uint8_t protection_bytes_per_key, const char* kv_checksum,
528
- uint32_t block_restart_interval) {
563
+ uint32_t block_restart_interval,
564
+ const char* values_section) {
529
565
  assert(data_ == nullptr); // Ensure it is called only once
530
566
  assert(num_restarts > 0); // Ensure the param is valid
531
-
532
- icmp_ = std::make_unique<InternalKeyComparator>(raw_ucmp);
567
+ assert(raw_ucmp != nullptr);
568
+ icmp_ = InternalKeyComparator(raw_ucmp);
533
569
  data_ = data;
534
570
  restarts_ = restarts;
535
571
  num_restarts_ = num_restarts;
536
- current_ = restarts_;
537
572
  restart_index_ = num_restarts_;
573
+ entry_ = Slice();
538
574
  global_seqno_ = global_seqno;
539
- if (raw_ucmp != nullptr) {
540
- ts_sz_ = raw_ucmp->timestamp_size();
541
- }
575
+ ts_sz_ = raw_ucmp->timestamp_size();
542
576
  pad_min_timestamp_ = ts_sz_ > 0 && !user_defined_timestamp_persisted;
543
577
  block_contents_pinned_ = block_contents_pinned;
544
578
  cache_handle_ = nullptr;
@@ -552,10 +586,13 @@ class BlockIter : public InternalIteratorBase<TValue> {
552
586
  assert((protection_bytes_per_key == 0 && kv_checksum == nullptr) ||
553
587
  (protection_bytes_per_key > 0 && kv_checksum != nullptr &&
554
588
  (block_restart_interval > 0 || num_restarts == 1)));
589
+
590
+ values_section_ = values_section;
591
+ current_ = GetKeysEndOffset();
555
592
  }
556
593
 
557
594
  void CorruptionError(const std::string& error_msg = "bad entry in block") {
558
- current_ = restarts_;
595
+ current_ = GetKeysEndOffset();
559
596
  restart_index_ = num_restarts_;
560
597
  status_ = Status::Corruption(error_msg);
561
598
  raw_key_.Clear();
@@ -571,14 +608,18 @@ class BlockIter : public InternalIteratorBase<TValue> {
571
608
  CorruptionError(error_msg);
572
609
  }
573
610
 
574
- void UpdateRawKeyAndMaybePadMinTimestamp(const Slice& key) {
611
+ void UpdateRawKeyAndMaybePadMinTimestamp(IterKey& raw_key, const Slice& key) {
575
612
  if (pad_min_timestamp_) {
576
- raw_key_.SetKeyWithPaddedMinTimestamp(key, ts_sz_);
613
+ raw_key.SetKeyWithPaddedMinTimestamp(key, ts_sz_);
577
614
  } else {
578
- raw_key_.SetKey(key, false /* copy */);
615
+ raw_key.SetKey(key, false /* copy */);
579
616
  }
580
617
  }
581
618
 
619
+ void UpdateRawKeyAndMaybePadMinTimestamp(const Slice& key) {
620
+ UpdateRawKeyAndMaybePadMinTimestamp(raw_key_, key);
621
+ }
622
+
582
623
  // Must be called every time a key is found that needs to be returned to user,
583
624
  // and may be called when no key is found (as a no-op). Updates `key_`,
584
625
  // `key_buf_`, and `key_pinned_` with info about the found key.
@@ -618,18 +659,31 @@ class BlockIter : public InternalIteratorBase<TValue> {
618
659
  }
619
660
  }
620
661
 
621
- // Returns the result of `Comparator::Compare()`, where the appropriate
622
- // comparator is used for the block contents, the LHS argument is the current
623
- // key with global seqno applied, and the RHS argument is `other`.
624
- int CompareCurrentKey(const Slice& other) {
662
+ // Compares two keys using the appropriate comparator for the block contents.
663
+ // Uses user comparator when the block stores user keys, otherwise uses the
664
+ // internal key comparator. When global_seqno is not disabled, applies it to
665
+ // the LHS key for comparison.
666
+ int CompareKey(const Slice& a, const Slice& b) const {
667
+ assert(icmp_.user_comparator() != nullptr);
625
668
  if (raw_key_.IsUserKey()) {
626
669
  assert(global_seqno_ == kDisableGlobalSequenceNumber);
627
- return icmp_->user_comparator()->Compare(raw_key_.GetUserKey(), other);
670
+ return icmp_.user_comparator()->Compare(a, b);
628
671
  } else if (global_seqno_ == kDisableGlobalSequenceNumber) {
629
- return icmp_->Compare(raw_key_.GetInternalKey(), other);
672
+ return icmp_.Compare(a, b);
673
+ }
674
+ return icmp_.Compare(a, global_seqno_, b, kDisableGlobalSequenceNumber);
675
+ }
676
+
677
+ int CompareKey(const IterKey& a, const Slice& b) const {
678
+ if (a.IsUserKey()) {
679
+ return CompareKey(a.GetUserKey(), b);
630
680
  }
631
- return icmp_->Compare(raw_key_.GetInternalKey(), global_seqno_, other,
632
- kDisableGlobalSequenceNumber);
681
+ return CompareKey(a.GetInternalKey(), b);
682
+ }
683
+
684
+ // Compares the current key (with global seqno applied) against `other`.
685
+ int CompareCurrentKey(const Slice& other) const {
686
+ return CompareKey(raw_key_, other);
633
687
  }
634
688
 
635
689
  private:
@@ -644,28 +698,49 @@ class BlockIter : public InternalIteratorBase<TValue> {
644
698
  // Return the offset in data_ just past the end of the current entry.
645
699
  inline uint32_t NextEntryOffset() const {
646
700
  // NOTE: We don't support blocks bigger than 2GB
647
- return static_cast<uint32_t>((value_.data() + value_.size()) - data_);
701
+ return static_cast<uint32_t>((entry_.data() + entry_.size()) - data_);
702
+ }
703
+
704
+ // Return the offset where the keys section ends.
705
+ // For separated KV storage, this is the start of the values section.
706
+ // Otherwise, it's the start of the restart array.
707
+ inline uint32_t GetKeysEndOffset() const {
708
+ return values_section_ ? static_cast<uint32_t>(values_section_ - data_)
709
+ : restarts_;
648
710
  }
649
711
 
650
712
  uint32_t GetRestartPoint(uint32_t index) const {
651
713
  assert(index < num_restarts_);
652
- return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
714
+ uint32_t offset =
715
+ DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
716
+ assert(!values_section_ || offset <= values_section_ - data_);
717
+ return offset;
653
718
  }
654
719
 
655
720
  void SeekToRestartPoint(uint32_t index) {
656
721
  raw_key_.Clear();
657
722
  restart_index_ = index;
658
- // current_ will be fixed by ParseNextKey();
723
+ // Set to one before the first entry so ParseNextKey() increments to correct
724
+ // position
725
+ cur_entry_idx_ = static_cast<int32_t>(index * block_restart_interval_) - 1;
659
726
 
660
- // ParseNextKey() starts at the end of value_, so set value_ accordingly
727
+ // ParseNextKey() starts at the end of entry_, so set value_ accordingly
661
728
  uint32_t offset = GetRestartPoint(index);
662
- value_ = Slice(data_ + offset, 0);
729
+ entry_ = Slice(data_ + offset, 0);
663
730
  }
664
731
 
665
732
  protected:
666
733
  template <typename DecodeKeyFunc>
667
- inline bool BinarySeek(const Slice& target, uint32_t* index,
668
- bool* is_index_key_result);
734
+ inline bool GetRestartKey(uint32_t index, Slice* key);
735
+
736
+ template <typename DecodeKeyFunc>
737
+ inline bool BinarySeekRestartPointIndex(const Slice& target, uint32_t* index,
738
+ bool* is_index_key_result);
739
+
740
+ template <typename DecodeKeyFunc>
741
+ inline bool InterpolationSeekRestartPointIndex(const Slice& target,
742
+ uint32_t* index,
743
+ bool* is_index_key_result);
669
744
 
670
745
  // Find the first key in restart interval `index` that is >= `target`.
671
746
  // If there is no such key, iterator is positioned at the first key in
@@ -692,11 +767,11 @@ class DataBlockIter final : public BlockIter<Slice> {
692
767
  bool user_defined_timestamps_persisted,
693
768
  DataBlockHashIndex* data_block_hash_index,
694
769
  uint8_t protection_bytes_per_key, const char* kv_checksum,
695
- uint32_t block_restart_interval) {
770
+ uint32_t block_restart_interval, const char* values_section) {
696
771
  InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno,
697
772
  block_contents_pinned, user_defined_timestamps_persisted,
698
773
  protection_bytes_per_key, kv_checksum,
699
- block_restart_interval);
774
+ block_restart_interval, values_section);
700
775
  raw_key_.SetIsUserKey(false);
701
776
  read_amp_bitmap_ = read_amp_bitmap;
702
777
  last_bitmap_offset_ = current_ + 1;
@@ -753,9 +828,11 @@ class DataBlockIter final : public BlockIter<Slice> {
753
828
  // last `current_` value we report to read-amp bitmp
754
829
  mutable uint32_t last_bitmap_offset_;
755
830
  struct CachedPrevEntry {
756
- explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr,
757
- size_t _key_offset, size_t _key_size, Slice _value)
831
+ explicit CachedPrevEntry(uint32_t _offset, uint32_t _entry_size,
832
+ const char* _key_ptr, size_t _key_offset,
833
+ size_t _key_size, Slice _value)
758
834
  : offset(_offset),
835
+ entry_size(_entry_size),
759
836
  key_ptr(_key_ptr),
760
837
  key_offset(_key_offset),
761
838
  key_size(_key_size),
@@ -763,6 +840,8 @@ class DataBlockIter final : public BlockIter<Slice> {
763
840
 
764
841
  // offset of entry in block
765
842
  uint32_t offset;
843
+ // size of entry (for NextEntryOffset calculation)
844
+ uint32_t entry_size;
766
845
  // Pointer to key data in block (nullptr if key is delta-encoded)
767
846
  const char* key_ptr;
768
847
  // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr)
@@ -791,14 +870,15 @@ class MetaBlockIter final : public BlockIter<Slice> {
791
870
  MetaBlockIter() : BlockIter() { raw_key_.SetIsUserKey(true); }
792
871
  void Initialize(const char* data, uint32_t restarts, uint32_t num_restarts,
793
872
  bool block_contents_pinned, uint8_t protection_bytes_per_key,
794
- const char* kv_checksum, uint32_t block_restart_interval) {
873
+ const char* kv_checksum, uint32_t block_restart_interval,
874
+ const char* values_section) {
795
875
  // Initializes the iterator with a BytewiseComparator and
796
876
  // the raw key being a user key.
797
877
  InitializeBase(BytewiseComparator(), data, restarts, num_restarts,
798
878
  kDisableGlobalSequenceNumber, block_contents_pinned,
799
879
  /* user_defined_timestamps_persisted */ true,
800
880
  protection_bytes_per_key, kv_checksum,
801
- block_restart_interval);
881
+ block_restart_interval, values_section);
802
882
  raw_key_.SetIsUserKey(true);
803
883
  }
804
884
 
@@ -829,22 +909,24 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
829
909
  // format.
830
910
  // value_is_full, default true, means that no delta encoding is
831
911
  // applied to values.
832
- void Initialize(const Comparator* raw_ucmp, const char* data,
833
- uint32_t restarts, uint32_t num_restarts,
834
- SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
835
- bool have_first_key, bool key_includes_seq,
836
- bool value_is_full, bool block_contents_pinned,
837
- bool user_defined_timestamps_persisted,
838
- uint8_t protection_bytes_per_key, const char* kv_checksum,
839
- uint32_t block_restart_interval) {
912
+ void Initialize(
913
+ const Comparator* raw_ucmp, const char* data, uint32_t restarts,
914
+ uint32_t num_restarts, SequenceNumber global_seqno,
915
+ BlockPrefixIndex* prefix_index, bool have_first_key,
916
+ bool key_includes_seq, bool value_is_full, bool block_contents_pinned,
917
+ bool user_defined_timestamps_persisted, uint8_t protection_bytes_per_key,
918
+ const char* kv_checksum, uint32_t block_restart_interval,
919
+ const char* values_section,
920
+ BlockBasedTableOptions::BlockSearchType index_block_search_type) {
840
921
  InitializeBase(raw_ucmp, data, restarts, num_restarts,
841
922
  kDisableGlobalSequenceNumber, block_contents_pinned,
842
923
  user_defined_timestamps_persisted, protection_bytes_per_key,
843
- kv_checksum, block_restart_interval);
924
+ kv_checksum, block_restart_interval, values_section);
844
925
  raw_key_.SetIsUserKey(!key_includes_seq);
845
926
  prefix_index_ = prefix_index;
846
927
  value_delta_encoded_ = !value_is_full;
847
928
  have_first_key_ = have_first_key;
929
+ index_search_type_ = index_block_search_type;
848
930
  if (have_first_key_ && global_seqno != kDisableGlobalSequenceNumber) {
849
931
  global_seqno_state_.reset(new GlobalSeqnoState(global_seqno));
850
932
  } else {
@@ -894,7 +976,7 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
894
976
 
895
977
  void SeekForPrevImpl(const Slice&) override {
896
978
  assert(false);
897
- current_ = restarts_;
979
+ current_ = GetKeysEndOffset();
898
980
  restart_index_ = num_restarts_;
899
981
  status_ = Status::InvalidArgument(
900
982
  "RocksDB internal error: should never call SeekForPrev() on index "
@@ -939,6 +1021,10 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
939
1021
  // `pad_min_timestamp_` is true.
940
1022
  std::string first_internal_key_with_ts_;
941
1023
 
1024
+ // The search algorithm to use when reading the index block.
1025
+ BlockBasedTableOptions::BlockSearchType index_search_type_ =
1026
+ BlockBasedTableOptions::kBinary;
1027
+
942
1028
  // Set *prefix_may_exist to false if no key possibly share the same prefix
943
1029
  // as `target`. If not set, the result position should be the same as total
944
1030
  // order Seek.
@@ -951,6 +1037,10 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
951
1037
  bool* prefix_may_exist);
952
1038
  inline int CompareBlockKey(uint32_t block_index, const Slice& target);
953
1039
 
1040
+ template <typename DecodeKeyFunc>
1041
+ bool FindRestartPointForSeek(const Slice& seek_key, uint32_t* index,
1042
+ bool* skip_linear_scan);
1043
+
954
1044
  inline bool ParseNextIndexKey();
955
1045
 
956
1046
  // When value_delta_encoded_ is enabled it decodes the value which is assumed