@nxtedition/rocksdb 15.4.0 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (402) hide show
  1. package/binding.cc +24 -19
  2. package/cache.js +1 -1
  3. package/chained-batch.js +12 -3
  4. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  5. package/deps/rocksdb/rocksdb/BUCK +42 -0
  6. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  7. package/deps/rocksdb/rocksdb/Makefile +59 -32
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  9. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  10. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  11. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  12. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  13. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  19. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  26. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  28. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  29. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  31. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  33. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  34. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  53. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  54. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  55. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  57. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  58. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  59. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  60. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  61. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  62. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  63. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  64. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  65. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  66. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  67. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  68. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  79. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  80. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  81. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  82. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  83. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  84. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  85. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  86. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  87. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  88. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  89. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  90. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  91. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  92. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  93. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  94. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  95. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  96. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  97. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  98. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  99. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  100. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  101. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  102. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  103. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  104. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  105. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  106. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  107. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  110. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  111. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  112. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  113. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  115. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  116. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  118. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  119. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  120. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  121. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  122. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  123. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  124. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  125. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  126. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  127. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  128. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  129. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  130. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  131. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  132. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  133. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  134. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  135. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  136. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  137. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  138. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  139. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  140. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  141. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  142. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  143. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  144. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  145. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  146. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  147. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  148. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  150. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  151. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  152. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  153. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  160. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  161. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  162. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  163. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  164. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  165. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  166. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  167. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  168. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  169. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  171. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  173. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  174. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  175. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  176. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  177. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  180. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  181. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  182. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  183. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  184. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  185. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  187. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  188. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  189. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  192. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  194. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  197. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  198. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  199. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  200. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  202. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  204. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  205. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  206. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  210. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  211. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  212. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  213. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  214. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  215. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  216. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  217. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  218. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  219. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  220. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  221. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  222. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  223. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  224. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  225. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  226. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  227. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  228. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  229. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  230. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  231. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  232. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  233. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  234. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  235. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  236. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  237. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  238. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  239. package/deps/rocksdb/rocksdb/src.mk +12 -0
  240. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  241. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  243. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  253. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  254. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  255. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  256. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  257. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  258. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  259. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  260. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  261. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  263. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  264. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  265. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  266. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  267. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  268. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  269. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  270. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  273. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  274. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  275. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  276. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  277. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  278. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  279. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  280. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  281. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  282. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  283. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  284. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  286. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  287. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  288. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  289. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  290. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  291. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  292. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  293. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  294. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  295. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  296. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  297. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  298. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  299. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  300. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  301. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  302. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  303. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  304. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  305. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  306. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  307. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  308. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  309. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  310. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  311. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  312. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  313. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  314. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  315. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  316. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  317. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  318. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  319. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  320. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  321. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  322. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  323. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  324. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  325. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  326. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  328. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  329. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  331. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  332. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  333. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  334. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  335. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  336. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  337. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  338. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  339. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  340. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  341. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  342. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  343. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  344. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  355. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  356. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  358. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  360. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  361. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  362. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  364. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  365. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  366. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  367. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  368. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  369. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  370. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  371. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  373. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  375. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  376. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  377. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  378. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  380. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  381. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  388. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  389. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  390. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  391. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  392. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  393. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  394. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  395. package/deps/rocksdb/rocksdb.gyp +7 -0
  396. package/index.js +11 -2
  397. package/iterator.js +15 -7
  398. package/package.json +1 -1
  399. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  400. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -76,6 +76,7 @@
76
76
  #include "test_util/testutil.h"
77
77
  #include "util/coding.h"
78
78
  #include "util/compression.h"
79
+ #include "util/defer.h"
79
80
  #include "util/file_checksum_helper.h"
80
81
  #include "util/random.h"
81
82
  #include "util/string_util.h"
@@ -582,18 +583,16 @@ class DBConstructor : public Constructor {
582
583
  public:
583
584
  explicit DBConstructor(const Comparator* cmp)
584
585
  : Constructor(cmp), comparator_(cmp) {
585
- db_ = nullptr;
586
586
  NewDB();
587
587
  }
588
- ~DBConstructor() override { delete db_; }
588
+ ~DBConstructor() override {}
589
589
  Status FinishImpl(const Options& /*options*/,
590
590
  const ImmutableOptions& /*ioptions*/,
591
591
  const MutableCFOptions& /*moptions*/,
592
592
  const BlockBasedTableOptions& /*table_options*/,
593
593
  const InternalKeyComparator& /*internal_comparator*/,
594
594
  const stl_wrappers::KVMap& kv_map) override {
595
- delete db_;
596
- db_ = nullptr;
595
+ db_.reset();
597
596
  NewDB();
598
597
  for (const auto& kv : kv_map) {
599
598
  WriteBatch batch;
@@ -608,7 +607,7 @@ class DBConstructor : public Constructor {
608
607
  return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
609
608
  }
610
609
 
611
- DB* db() const override { return db_; }
610
+ DB* db() const override { return db_.get(); }
612
611
 
613
612
  private:
614
613
  void NewDB() {
@@ -627,7 +626,7 @@ class DBConstructor : public Constructor {
627
626
  }
628
627
 
629
628
  const Comparator* comparator_;
630
- DB* db_;
629
+ std::unique_ptr<DB> db_;
631
630
  };
632
631
 
633
632
  enum TestType {
@@ -674,35 +673,6 @@ static std::vector<TestArgs> GenerateArgList() {
674
673
  std::vector<int> restart_intervals = {16, 1, 1024};
675
674
  std::vector<uint32_t> compression_parallel_threads = {1, 4};
676
675
 
677
- // Only add compression if it is supported
678
- std::vector<std::pair<CompressionType, bool>> compression_types;
679
- compression_types.emplace_back(kNoCompression, false);
680
- if (Snappy_Supported()) {
681
- compression_types.emplace_back(kSnappyCompression, false);
682
- }
683
- if (Zlib_Supported()) {
684
- compression_types.emplace_back(kZlibCompression, false);
685
- compression_types.emplace_back(kZlibCompression, true);
686
- }
687
- if (BZip2_Supported()) {
688
- compression_types.emplace_back(kBZip2Compression, false);
689
- compression_types.emplace_back(kBZip2Compression, true);
690
- }
691
- if (LZ4_Supported()) {
692
- compression_types.emplace_back(kLZ4Compression, false);
693
- compression_types.emplace_back(kLZ4Compression, true);
694
- compression_types.emplace_back(kLZ4HCCompression, false);
695
- compression_types.emplace_back(kLZ4HCCompression, true);
696
- }
697
- if (XPRESS_Supported()) {
698
- compression_types.emplace_back(kXpressCompression, false);
699
- compression_types.emplace_back(kXpressCompression, true);
700
- }
701
- if (ZSTD_Supported()) {
702
- compression_types.emplace_back(kZSTD, false);
703
- compression_types.emplace_back(kZSTD, true);
704
- }
705
-
706
676
  for (auto test_type : test_types) {
707
677
  for (auto reverse_compare : reverse_compare_types) {
708
678
  if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
@@ -713,9 +683,9 @@ static std::vector<TestArgs> GenerateArgList() {
713
683
  one_arg.type = test_type;
714
684
  one_arg.reverse_compare = reverse_compare;
715
685
  one_arg.restart_interval = restart_intervals[0];
716
- one_arg.compression = compression_types[0].first;
686
+ one_arg.compression = kNoCompression;
717
687
  one_arg.compression_parallel_threads = 1;
718
- one_arg.format_version = 0;
688
+ one_arg.format_version = 0; // Plain tables use their own versioning
719
689
  one_arg.use_mmap = true;
720
690
  test_args.push_back(one_arg);
721
691
  one_arg.use_mmap = false;
@@ -724,17 +694,20 @@ static std::vector<TestArgs> GenerateArgList() {
724
694
  }
725
695
 
726
696
  for (auto restart_interval : restart_intervals) {
727
- for (auto compression_type : compression_types) {
697
+ for (auto compression_type : GetSupportedCompressions()) {
728
698
  for (auto num_threads : compression_parallel_threads) {
729
- TestArgs one_arg;
730
- one_arg.type = test_type;
731
- one_arg.reverse_compare = reverse_compare;
732
- one_arg.restart_interval = restart_interval;
733
- one_arg.compression = compression_type.first;
734
- one_arg.compression_parallel_threads = num_threads;
735
- one_arg.format_version = compression_type.second ? 2 : 1;
736
- one_arg.use_mmap = false;
737
- test_args.push_back(one_arg);
699
+ // format_version = 7 changes some compression handling
700
+ for (uint32_t fv : {kMinSupportedBbtFormatVersionForRead, 7U}) {
701
+ TestArgs one_arg;
702
+ one_arg.type = test_type;
703
+ one_arg.reverse_compare = reverse_compare;
704
+ one_arg.restart_interval = restart_interval;
705
+ one_arg.compression = compression_type;
706
+ one_arg.compression_parallel_threads = num_threads;
707
+ one_arg.format_version = fv;
708
+ one_arg.use_mmap = false;
709
+ test_args.push_back(one_arg);
710
+ }
738
711
  }
739
712
  }
740
713
  }
@@ -767,9 +740,6 @@ class FixedOrLessPrefixTransform : public SliceTransform {
767
740
 
768
741
  bool InDomain(const Slice& /*src*/) const override { return true; }
769
742
 
770
- bool InRange(const Slice& dst) const override {
771
- return (dst.size() <= prefix_len_);
772
- }
773
743
  bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
774
744
  };
775
745
 
@@ -1142,7 +1112,7 @@ class GeneralTableTest : public TableTest {};
1142
1112
  class BlockBasedTableTestBase : public TableTest {};
1143
1113
  class BlockBasedTableTest : public BlockBasedTableTestBase,
1144
1114
  virtual public ::testing::WithParamInterface<
1145
- std::tuple<uint32_t, size_t, size_t>> {
1115
+ std::tuple<uint32_t, size_t, size_t, bool>> {
1146
1116
  public:
1147
1117
  BlockBasedTableTest() : format_(std::get<0>(GetParam())) {
1148
1118
  env_ = Env::Default();
@@ -1154,6 +1124,8 @@ class BlockBasedTableTest : public BlockBasedTableTestBase,
1154
1124
  auto param = GetParam();
1155
1125
  options.super_block_alignment_size = std::get<1>(param);
1156
1126
  options.super_block_alignment_space_overhead_ratio = std::get<2>(param);
1127
+ // separate_key_value_in_data_block
1128
+ options.separate_key_value_in_data_block = std::get<3>(param);
1157
1129
  return options;
1158
1130
  }
1159
1131
 
@@ -1390,7 +1362,7 @@ INSTANTIATE_TEST_CASE_P(
1390
1362
  testing::Combine(testing::ValuesIn(test::kFooterFormatVersionsToTest),
1391
1363
  testing::Values(0, 128 * 1024, 512 * 1024,
1392
1364
  2 * 1024 * 1024),
1393
- testing::Values(2048, 32, 128)));
1365
+ testing::Values(2048, 32, 128), testing::Bool()));
1394
1366
 
1395
1367
  // This test serves as the living tutorial for the prefix scan of user collected
1396
1368
  // properties.
@@ -1760,7 +1732,13 @@ TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1760
1732
  ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1761
1733
 
1762
1734
  // Verify data size.
1763
- BlockBuilder block_builder(1);
1735
+ BlockBuilder block_builder(
1736
+ 1 /* block_restart_interval */, true /* use_delta_encoding */,
1737
+ false /* use_value_delta_encoding */,
1738
+ BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
1739
+ 0.75 /* data_block_hash_table_util_ratio */, 0 /* ts_sz */,
1740
+ true /* persist_user_defined_timestamps */, false /* is_user_key */,
1741
+ table_options.separate_key_value_in_data_block);
1764
1742
  for (const auto& item : kvmap) {
1765
1743
  block_builder.Add(item.first, item.second);
1766
1744
  }
@@ -2305,6 +2283,44 @@ TEST_P(BlockBasedTableTest, BadChecksumType) {
2305
2283
  "Corruption: Corrupt or unsupported checksum type: 123 in test");
2306
2284
  }
2307
2285
 
2286
+ TEST_P(BlockBasedTableTest, ReservedBitInDataBlockFooter) {
2287
+ // Test that reserved metadata bits in data block footer are detected.
2288
+ // We construct a block directly rather than going through the full table
2289
+ // iterator path to avoid issues with iterator error handling.
2290
+
2291
+ // Build a simple data block
2292
+ BlockBuilder builder(16 /* restart_interval */);
2293
+ InternalKey key("abc", 1, kTypeValue);
2294
+ builder.Add(key.Encode(), "test_value");
2295
+ Slice block_contents = builder.Finish();
2296
+ std::string block_data = block_contents.ToString();
2297
+
2298
+ // The footer is the last 4 bytes - corrupt it by setting reserved bit 30
2299
+ ASSERT_GE(block_data.size(), sizeof(uint32_t));
2300
+ size_t footer_offset = block_data.size() - sizeof(uint32_t);
2301
+ uint32_t footer = DecodeFixed32(block_data.data() + footer_offset);
2302
+ footer |= (1u << 30); // Set a reserved bit
2303
+ EncodeFixed32(&block_data[footer_offset], footer);
2304
+
2305
+ // Try to construct a Block from the corrupted data
2306
+ BlockContents contents(std::move(block_data));
2307
+ Block block(std::move(contents), 0 /* read_amp_bytes_per_bit */);
2308
+
2309
+ // Block should have size() == 0 indicating error
2310
+ ASSERT_EQ(block.size(), 0u);
2311
+
2312
+ // Try to get an iterator - it should be invalid with corruption status
2313
+ DataBlockIter iter;
2314
+ block.NewDataIterator(BytewiseComparator(), kMaxSequenceNumber, &iter,
2315
+ /*stats=*/nullptr, /*block_contents_pinned=*/false);
2316
+ ASSERT_FALSE(iter.Valid());
2317
+ ASSERT_EQ(iter.status().code(), Status::kCorruption)
2318
+ << iter.status().ToString();
2319
+ ASSERT_NE(iter.status().ToString().find("reserved bits set"),
2320
+ std::string::npos)
2321
+ << iter.status().ToString();
2322
+ }
2323
+
2308
2324
  class BuiltinChecksumTest : public testing::Test,
2309
2325
  public testing::WithParamInterface<ChecksumType> {};
2310
2326
 
@@ -2676,9 +2692,18 @@ void TableTest::IndexTest(BlockBasedTableOptions table_options) {
2676
2692
  c.ResetTableReader();
2677
2693
  }
2678
2694
 
2679
- TEST_P(BlockBasedTableTest, BinaryIndexTest) {
2695
+ TEST_P(BlockBasedTableTest, BinaryIndexTestBinarySearch) {
2696
+ BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2697
+ table_options.index_type = BlockBasedTableOptions::kBinarySearch;
2698
+ table_options.index_block_search_type = BlockBasedTableOptions::kBinary;
2699
+ IndexTest(table_options);
2700
+ }
2701
+
2702
+ TEST_P(BlockBasedTableTest, BinaryIndexTestInterpolationSearch) {
2680
2703
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2681
2704
  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
2705
+ table_options.index_block_search_type =
2706
+ BlockBasedTableOptions::kInterpolation;
2682
2707
  IndexTest(table_options);
2683
2708
  }
2684
2709
 
@@ -4306,14 +4331,26 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
4306
4331
  get_perf_context()->Reset();
4307
4332
  ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
4308
4333
  moptions.prefix_extractor.get()));
4334
+ const uint64_t total_classified_bytes =
4335
+ get_perf_context()->data_block_read_byte +
4336
+ get_perf_context()->index_block_read_byte +
4337
+ get_perf_context()->filter_block_read_byte +
4338
+ get_perf_context()->compression_dict_block_read_byte +
4339
+ get_perf_context()->metadata_block_read_byte;
4340
+ ASSERT_EQ(get_perf_context()->block_read_byte, total_classified_bytes);
4309
4341
  if (index_and_filter_in_cache) {
4310
4342
  // data, index and filter block
4311
4343
  ASSERT_EQ(get_perf_context()->block_read_count, 3);
4312
4344
  ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
4313
4345
  ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
4346
+ ASSERT_GT(get_perf_context()->data_block_read_byte, 0);
4347
+ ASSERT_GT(get_perf_context()->index_block_read_byte, 0);
4348
+ ASSERT_GT(get_perf_context()->filter_block_read_byte, 0);
4314
4349
  } else {
4315
4350
  // just the data block
4316
4351
  ASSERT_EQ(get_perf_context()->block_read_count, 1);
4352
+ ASSERT_EQ(get_perf_context()->block_read_byte,
4353
+ get_perf_context()->data_block_read_byte);
4317
4354
  }
4318
4355
  ASSERT_EQ(get_context.State(), GetContext::kFound);
4319
4356
  ASSERT_STREQ(value.data(), "hello");
@@ -4332,6 +4369,13 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
4332
4369
  get_perf_context()->Reset();
4333
4370
  ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
4334
4371
  moptions.prefix_extractor.get()));
4372
+ const uint64_t total_classified_bytes =
4373
+ get_perf_context()->data_block_read_byte +
4374
+ get_perf_context()->index_block_read_byte +
4375
+ get_perf_context()->filter_block_read_byte +
4376
+ get_perf_context()->compression_dict_block_read_byte +
4377
+ get_perf_context()->metadata_block_read_byte;
4378
+ ASSERT_EQ(get_perf_context()->block_read_byte, total_classified_bytes);
4335
4379
  ASSERT_EQ(get_context.State(), GetContext::kNotFound);
4336
4380
  }
4337
4381
 
@@ -4345,6 +4389,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
4345
4389
  // with full-filter, we read filter first and then we stop
4346
4390
  ASSERT_EQ(get_perf_context()->block_read_count, 1);
4347
4391
  ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
4392
+ ASSERT_EQ(get_perf_context()->block_read_byte,
4393
+ get_perf_context()->filter_block_read_byte);
4348
4394
  }
4349
4395
  } else {
4350
4396
  // filter is already in memory and it figures out that the key doesn't
@@ -5002,30 +5048,11 @@ TEST(TableTest, FooterTests) {
5002
5048
  BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size);
5003
5049
  uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5;
5004
5050
  uint32_t base_context_checksum = 123456789;
5005
- {
5006
- // legacy block based
5007
- FooterBuilder footer;
5008
- ASSERT_OK(footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0,
5009
- footer_offset, kCRC32c, meta_index, index));
5010
- Footer decoded_footer;
5011
- ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
5012
- ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
5013
- ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
5014
- ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
5015
- ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
5016
- ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
5017
- ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
5018
- ASSERT_EQ(decoded_footer.format_version(), 0U);
5019
- ASSERT_EQ(decoded_footer.base_context_checksum(), 0U);
5020
- ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
5021
- // Ensure serialized with legacy magic
5022
- ASSERT_EQ(
5023
- DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8),
5024
- kLegacyBlockBasedTableMagicNumber);
5025
- }
5026
- // block based, various checksums, various versions
5051
+ // block based, various checksums, various versions (format_version >= 2)
5027
5052
  for (auto t : GetSupportedChecksums()) {
5028
- for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) {
5053
+ for (uint32_t fv = kMinSupportedBbtFormatVersionForWrite;
5054
+ IsSupportedFormatVersionForWrite(kBlockBasedTableMagicNumber, fv);
5055
+ ++fv) {
5029
5056
  uint32_t maybe_bcc =
5030
5057
  FormatVersionUsesContextChecksum(fv) ? base_context_checksum : 0U;
5031
5058
  FooterBuilder footer;
@@ -5072,41 +5099,154 @@ TEST(TableTest, FooterTests) {
5072
5099
  }
5073
5100
  }
5074
5101
 
5102
+ // plain table, various checksums, various versions (format_version >= 2)
5103
+ // Plain tables have no block trailer (size 0), so set up separate handles
5104
+ // Note: format_version >= 6 has complex footer checksum requirements,
5105
+ // so we only test format_version 2-5 for plain tables here
5075
5106
  {
5076
- // legacy plain table
5077
- FooterBuilder footer;
5078
- ASSERT_OK(footer.Build(kPlainTableMagicNumber, /* format_version */ 0,
5079
- footer_offset, kNoChecksum, meta_index));
5107
+ uint64_t plain_metaindex_size = r->Uniform(1000000);
5108
+ // For plain tables: metaindex is at offset 0, footer immediately follows
5109
+ BlockHandle plain_meta_index(0, plain_metaindex_size);
5110
+ uint64_t plain_footer_offset = plain_metaindex_size;
5111
+ for (auto t : GetSupportedChecksums()) {
5112
+ for (uint32_t fv = kMinSupportedBbtFormatVersionForWrite; fv < 6; ++fv) {
5113
+ FooterBuilder footer;
5114
+ ASSERT_OK(footer.Build(kPlainTableMagicNumber, fv, plain_footer_offset,
5115
+ t, plain_meta_index));
5116
+ Footer decoded_footer;
5117
+ ASSERT_OK(
5118
+ decoded_footer.DecodeFrom(footer.GetSlice(), plain_footer_offset));
5119
+ ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
5120
+ ASSERT_EQ(decoded_footer.checksum_type(), t);
5121
+ ASSERT_EQ(decoded_footer.metaindex_handle().offset(),
5122
+ plain_meta_index.offset());
5123
+ ASSERT_EQ(decoded_footer.metaindex_handle().size(),
5124
+ plain_meta_index.size());
5125
+ ASSERT_EQ(decoded_footer.format_version(), fv);
5126
+ ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
5127
+ }
5128
+ }
5129
+ }
5130
+ }
5131
+
5132
+ // Test that legacy SST formats (format_version < 2) are properly rejected
5133
+ TEST(TableTest, LegacyFormatRejectionTests) {
5134
+ // Temporarily disable unsupported format version allowance for this test
5135
+ bool& allow = TEST_AllowUnsupportedFormatVersion();
5136
+ SaveAndRestore<bool> saved_allow(&allow, false);
5137
+
5138
+ // Test legacy block-based magic number from LevelDB should be rejected
5139
+ {
5140
+ // Construct a fake footer with legacy block-based magic number
5141
+ std::array<char, Footer::kVersion0EncodedLength> fake_footer;
5142
+ std::fill(fake_footer.begin(), fake_footer.end(), 0);
5143
+ // Put legacy magic number at the end
5144
+ EncodeFixed64(fake_footer.data() + fake_footer.size() - 8,
5145
+ 0xdb4775248b80fb57ull /*legacy magic number*/);
5146
+
5080
5147
  Footer decoded_footer;
5081
- ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
5082
- ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
5083
- ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
5084
- ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
5085
- ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
5086
- ASSERT_EQ(decoded_footer.index_handle().offset(), 0U);
5087
- ASSERT_EQ(decoded_footer.index_handle().size(), 0U);
5088
- ASSERT_EQ(decoded_footer.format_version(), 0U);
5089
- ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
5090
- // Ensure serialized with legacy magic
5091
- ASSERT_EQ(
5092
- DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8),
5093
- kLegacyPlainTableMagicNumber);
5148
+ Status s = decoded_footer.DecodeFrom(
5149
+ Slice(fake_footer.data(), fake_footer.size()), 0);
5150
+ ASSERT_TRUE(s.IsNotSupported()) << s.ToString();
5151
+ ASSERT_TRUE(s.ToString().find("nsupported legacy magic number") !=
5152
+ std::string::npos)
5153
+ << s.ToString();
5154
+ ASSERT_TRUE(s.ToString().find("full compaction") != std::string::npos)
5155
+ << s.ToString();
5094
5156
  }
5157
+
5158
+ // Test format_version=1 with new magic number should be rejected
5095
5159
  {
5096
- // xxhash plain table (not currently used)
5097
- FooterBuilder footer;
5098
- ASSERT_OK(footer.Build(kPlainTableMagicNumber, /* format_version */ 1,
5099
- footer_offset, kxxHash, meta_index));
5160
+ std::array<char, Footer::kNewVersionsEncodedLength> fake_footer;
5161
+ std::fill(fake_footer.begin(), fake_footer.end(), 0);
5162
+ // Part 1: checksum type
5163
+ fake_footer[0] = kCRC32c;
5164
+ // Part 3: format_version=1 and new magic number
5165
+ char* part3 = fake_footer.data() + fake_footer.size() - 12;
5166
+ EncodeFixed32(part3, 1); // format_version = 1
5167
+ EncodeFixed64(part3 + 4, kBlockBasedTableMagicNumber);
5168
+
5100
5169
  Footer decoded_footer;
5101
- ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
5102
- ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
5103
- ASSERT_EQ(decoded_footer.checksum_type(), kxxHash);
5104
- ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
5105
- ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
5106
- ASSERT_EQ(decoded_footer.index_handle().offset(), 0U);
5107
- ASSERT_EQ(decoded_footer.index_handle().size(), 0U);
5108
- ASSERT_EQ(decoded_footer.format_version(), 1U);
5109
- ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
5170
+ Status s = decoded_footer.DecodeFrom(
5171
+ Slice(fake_footer.data(), fake_footer.size()), 0);
5172
+ // format_version=1 is not supported for read, should return Corruption
5173
+ ASSERT_TRUE(s.IsCorruption()) << s.ToString();
5174
+ ASSERT_TRUE(s.ToString().find("format_version") != std::string::npos)
5175
+ << s.ToString();
5176
+ }
5177
+
5178
+ // Test format_version=0 with new magic number should be rejected
5179
+ {
5180
+ std::array<char, Footer::kNewVersionsEncodedLength> fake_footer;
5181
+ std::fill(fake_footer.begin(), fake_footer.end(), 0);
5182
+ // Part 1: checksum type
5183
+ fake_footer[0] = kCRC32c;
5184
+ // Part 3: format_version=0 and new magic number
5185
+ char* part3 = fake_footer.data() + fake_footer.size() - 12;
5186
+ EncodeFixed32(part3, 0); // format_version = 0
5187
+ EncodeFixed64(part3 + 4, kBlockBasedTableMagicNumber);
5188
+
5189
+ Footer decoded_footer;
5190
+ Status s = decoded_footer.DecodeFrom(
5191
+ Slice(fake_footer.data(), fake_footer.size()), 0);
5192
+ // format_version=0 is not supported for read, should return Corruption
5193
+ ASSERT_TRUE(s.IsCorruption()) << s.ToString();
5194
+ ASSERT_TRUE(s.ToString().find("format_version") != std::string::npos)
5195
+ << s.ToString();
5196
+ }
5197
+ }
5198
+
5199
+ // Test that configuring unsupported format_version for writing is sanitized
5200
+ // or rejected as appropriate
5201
+ TEST(TableTest, UnsupportedFormatVersionConfigTest) {
5202
+ // Temporarily disable unsupported format version allowance for this test
5203
+ bool& allow = TEST_AllowUnsupportedFormatVersion();
5204
+ SaveAndRestore<bool> saved_allow(&allow, false);
5205
+
5206
+ // Test that format_version < kMinSupportedBbtFormatVersionForWrite is
5207
+ // sanitized to kMinSupportedBbtFormatVersionForWrite during initialization
5208
+ for (uint32_t fv = 0; fv < kMinSupportedBbtFormatVersionForWrite; ++fv) {
5209
+ BlockBasedTableOptions table_options;
5210
+ table_options.format_version = fv;
5211
+ BlockBasedTableFactory factory(table_options);
5212
+
5213
+ // After construction, format_version should be sanitized
5214
+ auto* opts = factory.GetOptions<BlockBasedTableOptions>();
5215
+ ASSERT_EQ(opts->format_version, kMinSupportedBbtFormatVersionForWrite)
5216
+ << "format_version=" << fv << " should be sanitized to "
5217
+ << kMinSupportedBbtFormatVersionForWrite;
5218
+ }
5219
+
5220
+ // Test that supported format versions are not changed
5221
+ for (uint32_t fv = kMinSupportedBbtFormatVersionForWrite;
5222
+ IsSupportedFormatVersionForWrite(kBlockBasedTableMagicNumber, fv);
5223
+ ++fv) {
5224
+ BlockBasedTableOptions table_options;
5225
+ table_options.format_version = fv;
5226
+ BlockBasedTableFactory factory(table_options);
5227
+
5228
+ auto* opts = factory.GetOptions<BlockBasedTableOptions>();
5229
+ ASSERT_EQ(opts->format_version, fv)
5230
+ << "format_version=" << fv << " should not be changed";
5231
+
5232
+ ColumnFamilyOptions cf_opts;
5233
+ DBOptions db_opts;
5234
+ Status s = factory.ValidateOptions(db_opts, cf_opts);
5235
+ ASSERT_OK(s) << "format_version=" << fv << ": " << s.ToString();
5236
+ }
5237
+
5238
+ // Test that format_version > kLatestBbtFormatVersion is rejected by
5239
+ // ValidateOptions (not sanitized, since it could be a future version that
5240
+ // requires newer code)
5241
+ {
5242
+ BlockBasedTableOptions table_options;
5243
+ table_options.format_version = kLatestBbtFormatVersion + 1;
5244
+ BlockBasedTableFactory factory(table_options);
5245
+
5246
+ ColumnFamilyOptions cf_opts;
5247
+ DBOptions db_opts;
5248
+ Status s = factory.ValidateOptions(db_opts, cf_opts);
5249
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
5110
5250
  }
5111
5251
  }
5112
5252
 
@@ -5211,10 +5351,6 @@ class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform {
5211
5351
  return IsValid(src);
5212
5352
  }
5213
5353
 
5214
- bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override {
5215
- return true;
5216
- }
5217
-
5218
5354
  bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const {
5219
5355
  if (src.size() != 4) {
5220
5356
  return false;
@@ -5252,7 +5388,7 @@ TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
5252
5388
  const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
5253
5389
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
5254
5390
  ASSERT_OK(DestroyDB(kDBPath, options));
5255
- ROCKSDB_NAMESPACE::DB* db;
5391
+ std::unique_ptr<ROCKSDB_NAMESPACE::DB> db;
5256
5392
  ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
5257
5393
 
5258
5394
  // Create a bunch of keys with 10 filters.
@@ -5266,7 +5402,7 @@ TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
5266
5402
 
5267
5403
  // Trigger compaction.
5268
5404
  ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5269
- delete db;
5405
+ db.reset();
5270
5406
  // In the second round, turn whole_key_filtering off and expect
5271
5407
  // rocksdb still works.
5272
5408
  }
@@ -5572,7 +5708,7 @@ TEST_P(BlockBasedTableTest, FixBlockAlignMismatchedFileChecksums) {
5572
5708
  const std::string kDBPath =
5573
5709
  test::PerThreadDBPath("block_align_padded_bytes_verify_file_checksums");
5574
5710
  ASSERT_OK(DestroyDB(kDBPath, options));
5575
- DB* db;
5711
+ std::unique_ptr<DB> db;
5576
5712
  ASSERT_OK(DB::Open(options, kDBPath, &db));
5577
5713
  ASSERT_OK(db->Put(WriteOptions(), "k1", "v1"));
5578
5714
  ASSERT_OK(db->Flush(FlushOptions()));
@@ -5580,7 +5716,7 @@ TEST_P(BlockBasedTableTest, FixBlockAlignMismatchedFileChecksums) {
5580
5716
  // aligning blocks are used to generate the checksum to compare against the
5581
5717
  // one not generated by padded bytes
5582
5718
  ASSERT_OK(db->VerifyFileChecksums(ReadOptions()));
5583
- delete db;
5719
+ db.reset();
5584
5720
  }
5585
5721
 
5586
5722
  class NoBufferAlignmenttWritableFile : public FSWritableFileOwnerWrapper {
@@ -5635,7 +5771,7 @@ TEST_P(BlockBasedTableTest,
5635
5771
  const std::string kDBPath = test::PerThreadDBPath(
5636
5772
  "block_align_flush_during_flush_verify_file_checksums");
5637
5773
  ASSERT_OK(DestroyDB(kDBPath, options));
5638
- DB* db;
5774
+ std::unique_ptr<DB> db;
5639
5775
  ASSERT_OK(DB::Open(options, kDBPath, &db));
5640
5776
 
5641
5777
  ASSERT_OK(db->Put(WriteOptions(), "k1", "k2"));
@@ -5644,7 +5780,7 @@ TEST_P(BlockBasedTableTest,
5644
5780
  // Before the fix, VerifyFileChecksums() will fail as incorrect padded bytes
5645
5781
  // were used to generate checksum upon file creation
5646
5782
  ASSERT_OK(db->VerifyFileChecksums(ReadOptions()));
5647
- delete db;
5783
+ db.reset();
5648
5784
  }
5649
5785
 
5650
5786
  TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
@@ -5707,8 +5843,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
5707
5843
  read_options_for_helper.verify_checksums = false;
5708
5844
  PersistentCacheOptions cache_options;
5709
5845
 
5710
- auto mgr = GetBuiltinCompressionManager(
5711
- GetCompressFormatForVersion(footer.format_version()));
5846
+ auto mgr = GetBuiltinV2CompressionManager();
5712
5847
  BlockFetcher block_fetcher(file, nullptr /* prefetch_buffer */, footer,
5713
5848
  read_options_for_helper, handle, contents,
5714
5849
  ioptions, false /* decompress */,
@@ -5722,8 +5857,12 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
5722
5857
  auto metaindex_handle = footer.metaindex_handle();
5723
5858
  BlockContents metaindex_contents;
5724
5859
 
5860
+ get_perf_context()->Reset();
5725
5861
  BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
5726
5862
  &metaindex_contents);
5863
+ ASSERT_GT(get_perf_context()->metadata_block_read_byte, 0);
5864
+ ASSERT_EQ(get_perf_context()->block_read_byte,
5865
+ get_perf_context()->metadata_block_read_byte);
5727
5866
  Block metaindex_block(std::move(metaindex_contents));
5728
5867
 
5729
5868
  std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
@@ -5735,8 +5874,12 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
5735
5874
  &properties_handle));
5736
5875
  ASSERT_FALSE(properties_handle.IsNull());
5737
5876
  BlockContents properties_contents;
5877
+ get_perf_context()->Reset();
5738
5878
  BlockFetchHelper(properties_handle, BlockType::kProperties,
5739
5879
  &properties_contents);
5880
+ ASSERT_GT(get_perf_context()->metadata_block_read_byte, 0);
5881
+ ASSERT_EQ(get_perf_context()->block_read_byte,
5882
+ get_perf_context()->metadata_block_read_byte);
5740
5883
  Block properties_block(std::move(properties_contents));
5741
5884
 
5742
5885
  ASSERT_EQ(properties_block.NumRestarts(), 1u);
@@ -5846,8 +5989,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
5846
5989
  auto metaindex_handle = footer.metaindex_handle();
5847
5990
  BlockContents metaindex_contents;
5848
5991
  PersistentCacheOptions pcache_opts;
5849
- auto mgr = GetBuiltinCompressionManager(
5850
- GetCompressFormatForVersion(footer.format_version()));
5992
+ auto mgr = GetBuiltinV2CompressionManager();
5851
5993
  BlockFetcher block_fetcher(
5852
5994
  table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
5853
5995
  metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
@@ -5929,8 +6071,7 @@ TEST_P(BlockBasedTableTest, SeekMetaBlocks) {
5929
6071
  auto metaindex_handle = footer.metaindex_handle();
5930
6072
  BlockContents metaindex_contents;
5931
6073
  PersistentCacheOptions pcache_opts;
5932
- auto mgr = GetBuiltinCompressionManager(
5933
- GetCompressFormatForVersion(footer.format_version()));
6074
+ auto mgr = GetBuiltinV2CompressionManager();
5934
6075
  BlockFetcher block_fetcher(
5935
6076
  table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
5936
6077
  metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
@@ -5980,27 +6121,25 @@ TEST_P(BlockBasedTableTest, BadOptions) {
5980
6121
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
5981
6122
  ASSERT_OK(DestroyDB(kDBPath, options));
5982
6123
 
5983
- std::unique_ptr<DB> db;
5984
6124
  {
5985
- ROCKSDB_NAMESPACE::DB* _db;
5986
- ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &_db));
6125
+ std::unique_ptr<ROCKSDB_NAMESPACE::DB> db;
6126
+ ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
5987
6127
 
5988
6128
  bbto.block_size = 4096;
5989
6129
  options.compression = kSnappyCompression;
5990
6130
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
5991
- ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &_db));
6131
+ ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
5992
6132
 
5993
6133
  options.compression = kNoCompression;
5994
6134
  options.bottommost_compression = kSnappyCompression;
5995
- ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &_db));
6135
+ ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
5996
6136
 
5997
6137
  options.bottommost_compression = kNoCompression;
5998
6138
  options.compression_per_level.emplace_back(kSnappyCompression);
5999
- ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &_db));
6139
+ ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
6000
6140
 
6001
6141
  options.compression_per_level.clear();
6002
- ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &_db));
6003
- db.reset(_db);
6142
+ ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
6004
6143
  }
6005
6144
  }
6006
6145
 
@@ -6237,6 +6376,46 @@ TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
6237
6376
  ASSERT_FALSE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound);
6238
6377
  }
6239
6378
 
6379
+ // Test that a single large entry with value larger than block size works
6380
+ TEST_P(BlockBasedTableTest, SingleLargeEntry) {
6381
+ TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
6382
+ Options options;
6383
+ BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
6384
+
6385
+ // Set a small block size
6386
+ constexpr size_t kBlockSize = 1024;
6387
+ table_options.block_size = kBlockSize;
6388
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6389
+ options.compression = kNoCompression;
6390
+
6391
+ // Create a value that is larger than the block size
6392
+ const size_t kLargeValueSize = kBlockSize * 4;
6393
+ std::string large_value(kLargeValueSize, 'x');
6394
+ c.Add("key1", large_value);
6395
+
6396
+ std::vector<std::string> keys;
6397
+ stl_wrappers::KVMap kvmap;
6398
+ const ImmutableOptions ioptions(options);
6399
+ const MutableCFOptions moptions(options);
6400
+ c.Finish(options, ioptions, moptions, table_options,
6401
+ GetPlainInternalComparator(options.comparator), &keys, &kvmap);
6402
+
6403
+ auto* reader = c.GetTableReader();
6404
+ ReadOptions read_options;
6405
+ std::unique_ptr<InternalIterator> iter(reader->NewIterator(
6406
+ read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr,
6407
+ /*skip_filters=*/false, TableReaderCaller::kUncategorized));
6408
+
6409
+ iter->SeekToFirst();
6410
+ ASSERT_TRUE(iter->Valid());
6411
+ ASSERT_OK(iter->status());
6412
+ ASSERT_EQ(large_value, iter->value().ToString());
6413
+
6414
+ iter->Next();
6415
+ ASSERT_FALSE(iter->Valid());
6416
+ ASSERT_OK(iter->status());
6417
+ }
6418
+
6240
6419
  class ChargeCompressionDictionaryBuildingBufferTest
6241
6420
  : public BlockBasedTableTestBase {};
6242
6421
  TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) {
@@ -6866,10 +7045,10 @@ class ExternalTableTest : public DBTestBase {
6866
7045
 
6867
7046
  Status Get(const ReadOptions& /*read_options*/, const Slice& key,
6868
7047
  const SliceTransform* /*prefix_extractor*/,
6869
- std::string* value) override {
7048
+ PinnableSlice* value) override {
6870
7049
  auto iter = kv_map_.find(key.ToString());
6871
7050
  if (iter != kv_map_.end()) {
6872
- value->assign(iter->second);
7051
+ value->PinSelf(iter->second);
6873
7052
  return Status::OK();
6874
7053
  }
6875
7054
  return Status::NotFound();
@@ -6878,7 +7057,7 @@ class ExternalTableTest : public DBTestBase {
6878
7057
  void MultiGet(const ReadOptions& read_options,
6879
7058
  const std::vector<Slice>& keys,
6880
7059
  const SliceTransform* prefix_extractor,
6881
- std::vector<std::string>* values,
7060
+ std::vector<PinnableSlice>* values,
6882
7061
  std::vector<Status>* statuses) override {
6883
7062
  values->resize(keys.size());
6884
7063
  statuses->resize(keys.size());
@@ -6912,6 +7091,38 @@ class ExternalTableTest : public DBTestBase {
6912
7091
  bool support_property_block_;
6913
7092
  };
6914
7093
 
7094
+ // A reader that pins values from its internal buffer, exercising the
7095
+ // zero-copy path in ExternalTableReaderAdapter::Get().
7096
+ class PinnedDummyExternalTableReader : public DummyExternalTableReader {
7097
+ public:
7098
+ using DummyExternalTableReader::DummyExternalTableReader;
7099
+
7100
+ Status Get(const ReadOptions& /*read_options*/, const Slice& key,
7101
+ const SliceTransform* /*prefix_extractor*/,
7102
+ PinnableSlice* value) override {
7103
+ auto it = pinned_data_.find(key.ToString());
7104
+ if (it != pinned_data_.end()) {
7105
+ Slice s(it->second);
7106
+ value->PinSlice(s, &PinCleanup, &pin_cleanup_count_, nullptr);
7107
+ return Status::OK();
7108
+ }
7109
+ return Status::NotFound();
7110
+ }
7111
+
7112
+ void SetPinnedData(const std::map<std::string, std::string>& data) {
7113
+ pinned_data_ = data;
7114
+ }
7115
+
7116
+ int pin_cleanup_count() const { return pin_cleanup_count_; }
7117
+
7118
+ private:
7119
+ static void PinCleanup(void* arg1, void* /*arg2*/) {
7120
+ (*static_cast<int*>(arg1))++;
7121
+ }
7122
+ std::map<std::string, std::string> pinned_data_;
7123
+ int pin_cleanup_count_ = 0;
7124
+ };
7125
+
6915
7126
  class DummyExternalTableBuilder : public ExternalTableBuilder {
6916
7127
  public:
6917
7128
  explicit DummyExternalTableBuilder(const std::string& file_path,
@@ -6984,6 +7195,37 @@ class ExternalTableTest : public DBTestBase {
6984
7195
  private:
6985
7196
  bool support_property_block_;
6986
7197
  };
7198
+
7199
+ class PinnedDummyExternalTableFactory : public ExternalTableFactory {
7200
+ public:
7201
+ const char* Name() const override {
7202
+ return "PinnedDummyExternalTableFactory";
7203
+ }
7204
+
7205
+ Status NewTableReader(
7206
+ const ReadOptions& /*read_options*/, const std::string& file_path,
7207
+ const ExternalTableOptions& /*topts*/,
7208
+ std::unique_ptr<ExternalTableReader>* table_reader) const override {
7209
+ auto* reader =
7210
+ new PinnedDummyExternalTableReader(file_path,
7211
+ /*support_property_block=*/true);
7212
+ last_reader_ = reader;
7213
+ table_reader->reset(reader);
7214
+ return Status::OK();
7215
+ }
7216
+
7217
+ ExternalTableBuilder* NewTableBuilder(
7218
+ const ExternalTableBuilderOptions& /*opts*/,
7219
+ const std::string& file_path, FSWritableFile* file) const override {
7220
+ return new DummyExternalTableBuilder(file_path, file,
7221
+ /*support_property_block=*/true);
7222
+ }
7223
+
7224
+ PinnedDummyExternalTableReader* last_reader() const { return last_reader_; }
7225
+
7226
+ private:
7227
+ mutable PinnedDummyExternalTableReader* last_reader_ = nullptr;
7228
+ };
6987
7229
  };
6988
7230
 
6989
7231
  TEST_F(ExternalTableTest, BasicTest) {
@@ -7021,11 +7263,11 @@ TEST_F(ExternalTableTest, BasicTest) {
7021
7263
  iter->Next();
7022
7264
  ASSERT_FALSE(iter->Valid());
7023
7265
 
7024
- std::string val;
7266
+ PinnableSlice val;
7025
7267
  ASSERT_OK(reader->Get({}, "foo", nullptr, &val));
7026
7268
  ASSERT_EQ(val, "bar");
7027
7269
 
7028
- std::vector<std::string> vals;
7270
+ std::vector<PinnableSlice> vals;
7029
7271
  std::vector<Status> statuses;
7030
7272
  reader->MultiGet({}, {"foo", "bar"}, nullptr, &vals, &statuses);
7031
7273
  ASSERT_EQ(vals.size(), 2);
@@ -7053,22 +7295,169 @@ TEST_F(ExternalTableTest, SstReaderTest) {
7053
7295
  std::unique_ptr<SstFileWriter> writer;
7054
7296
  writer.reset(new SstFileWriter(EnvOptions(), options));
7055
7297
  ASSERT_OK(writer->Open(ingest_file));
7056
- ASSERT_OK(writer->Put("foo", "bar"));
7298
+ ASSERT_OK(writer->Put("a", "val_a"));
7299
+ ASSERT_OK(writer->Put("b", "val_b"));
7300
+ ASSERT_OK(writer->Put("c", "val_c"));
7057
7301
  ASSERT_OK(writer->Finish());
7058
7302
  writer.reset();
7059
7303
 
7060
7304
  std::unique_ptr<SstFileReader> reader(new SstFileReader(options));
7061
7305
  ASSERT_OK(reader->Open(ingest_file));
7062
7306
 
7307
+ // Test iterator
7063
7308
  ReadOptions ro;
7064
7309
  std::unique_ptr<Iterator> iter(reader->NewIterator(ro));
7065
7310
  ASSERT_NE(iter, nullptr);
7066
- iter->Seek("foo");
7311
+ iter->Seek("a");
7067
7312
  ASSERT_TRUE(iter->Valid() && iter->status().ok());
7068
- ASSERT_EQ(iter->value(), "bar");
7313
+ ASSERT_EQ(iter->value(), "val_a");
7314
+ iter->Next();
7315
+ ASSERT_TRUE(iter->Valid());
7316
+ ASSERT_EQ(iter->value(), "val_b");
7317
+ iter->Next();
7318
+ ASSERT_TRUE(iter->Valid());
7319
+ ASSERT_EQ(iter->value(), "val_c");
7069
7320
  iter->Next();
7070
7321
  ASSERT_FALSE(iter->Valid());
7071
7322
  ASSERT_TRUE(iter->status().ok());
7323
+
7324
+ // Test MultiGet
7325
+ std::vector<Slice> keys = {"a", "b", "missing", "c"};
7326
+ std::vector<std::string> values;
7327
+ std::vector<Status> statuses = reader->MultiGet(ReadOptions(), keys, &values);
7328
+ ASSERT_EQ(values.size(), keys.size());
7329
+ ASSERT_EQ(statuses.size(), keys.size());
7330
+ ASSERT_OK(statuses[0]);
7331
+ ASSERT_EQ(values[0], "val_a");
7332
+ ASSERT_OK(statuses[1]);
7333
+ ASSERT_EQ(values[1], "val_b");
7334
+ ASSERT_TRUE(statuses[2].IsNotFound());
7335
+ ASSERT_OK(statuses[3]);
7336
+ ASSERT_EQ(values[3], "val_c");
7337
+ }
7338
+
7339
+ TEST_F(ExternalTableTest, PinnedGetTest) {
7340
+ if (encrypted_env_) {
7341
+ ROCKSDB_GTEST_SKIP("Test requires non-encrypted environment");
7342
+ return;
7343
+ }
7344
+ Options options = GetDefaultOptions();
7345
+ auto factory = std::make_shared<PinnedDummyExternalTableFactory>();
7346
+ options.table_factory = NewExternalTableFactory(factory);
7347
+ Reopen(options);
7348
+
7349
+ std::string ingest_file = dbname_ + "/test.immutabledb";
7350
+
7351
+ std::unique_ptr<SstFileWriter> writer;
7352
+ writer.reset(new SstFileWriter(EnvOptions(), options));
7353
+ ASSERT_OK(writer->Open(ingest_file));
7354
+ ASSERT_OK(writer->Put("key1", "val1"));
7355
+ ASSERT_OK(writer->Put("key2", "val2"));
7356
+ ASSERT_OK(writer->Finish());
7357
+ writer.reset();
7358
+
7359
+ IngestExternalFileOptions ifo;
7360
+ ASSERT_OK(db_->IngestExternalFile({ingest_file}, ifo));
7361
+ ASSERT_NE(factory->last_reader(), nullptr);
7362
+
7363
+ factory->last_reader()->SetPinnedData(
7364
+ {{"key1", "pinned_val1"}, {"key2", "pinned_val2"}});
7365
+
7366
+ PinnableSlice pinnable;
7367
+ ASSERT_OK(
7368
+ db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "key1", &pinnable));
7369
+ ASSERT_EQ(pinnable.ToString(), "pinned_val1");
7370
+ ASSERT_TRUE(pinnable.IsPinned());
7371
+ pinnable.Reset();
7372
+
7373
+ ASSERT_OK(
7374
+ db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "key2", &pinnable));
7375
+ ASSERT_EQ(pinnable.ToString(), "pinned_val2");
7376
+ ASSERT_TRUE(pinnable.IsPinned());
7377
+ pinnable.Reset();
7378
+
7379
+ // Verify cleanup ran for both Gets
7380
+ ASSERT_EQ(factory->last_reader()->pin_cleanup_count(), 2);
7381
+
7382
+ // Verify NotFound still works
7383
+ Status s =
7384
+ db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "missing", &pinnable);
7385
+ ASSERT_TRUE(s.IsNotFound());
7386
+
7387
+ // Test MultiGet with PinnableSlice to exercise the batched pin path
7388
+ const size_t num_keys = 3;
7389
+ std::array<Slice, num_keys> mg_keys = {Slice("key1"), Slice("missing"),
7390
+ Slice("key2")};
7391
+ std::array<PinnableSlice, num_keys> mg_values;
7392
+ std::array<Status, num_keys> mg_statuses;
7393
+ db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
7394
+ mg_keys.data(), mg_values.data(), mg_statuses.data());
7395
+
7396
+ ASSERT_OK(mg_statuses[0]);
7397
+ ASSERT_EQ(mg_values[0].ToString(), "pinned_val1");
7398
+ ASSERT_TRUE(mg_values[0].IsPinned());
7399
+
7400
+ ASSERT_TRUE(mg_statuses[1].IsNotFound());
7401
+
7402
+ ASSERT_OK(mg_statuses[2]);
7403
+ ASSERT_EQ(mg_values[2].ToString(), "pinned_val2");
7404
+ ASSERT_TRUE(mg_values[2].IsPinned());
7405
+
7406
+ // Reset PinnableSlices to trigger cleanups
7407
+ for (auto& v : mg_values) {
7408
+ v.Reset();
7409
+ }
7410
+ ASSERT_EQ(factory->last_reader()->pin_cleanup_count(), 4);
7411
+ }
7412
+
7413
+ TEST_F(ExternalTableTest, SstReaderPinnableMultiGetTest) {
7414
+ if (encrypted_env_) {
7415
+ ROCKSDB_GTEST_SKIP("Test requires non-encrypted environment");
7416
+ return;
7417
+ }
7418
+ Options options = GetDefaultOptions();
7419
+ std::string dbname =
7420
+ test::PerThreadDBPath("sst_reader_pinnable_multiget_test");
7421
+ std::string sst_file = dbname + "/test.sst";
7422
+ ASSERT_OK(options.env->CreateDirIfMissing(dbname));
7423
+
7424
+ std::unique_ptr<SstFileWriter> writer(
7425
+ new SstFileWriter(EnvOptions(), options));
7426
+ ASSERT_OK(writer->Open(sst_file));
7427
+ ASSERT_OK(writer->Put("a", "val_a"));
7428
+ ASSERT_OK(writer->Put("b", "val_b"));
7429
+ ASSERT_OK(writer->Put("c", "val_c"));
7430
+ ASSERT_OK(writer->Finish());
7431
+ writer.reset();
7432
+
7433
+ std::unique_ptr<SstFileReader> reader(new SstFileReader(options));
7434
+ ASSERT_OK(reader->Open(sst_file));
7435
+
7436
+ // Test PinnableSlice MultiGet
7437
+ std::vector<Slice> keys = {"a", "b", "missing", "c"};
7438
+ std::vector<PinnableSlice> values;
7439
+ std::vector<Status> statuses = reader->MultiGet(ReadOptions(), keys, &values);
7440
+ ASSERT_EQ(values.size(), keys.size());
7441
+ ASSERT_EQ(statuses.size(), keys.size());
7442
+ ASSERT_OK(statuses[0]);
7443
+ ASSERT_EQ(values[0].ToString(), "val_a");
7444
+ ASSERT_OK(statuses[1]);
7445
+ ASSERT_EQ(values[1].ToString(), "val_b");
7446
+ ASSERT_TRUE(statuses[2].IsNotFound());
7447
+ ASSERT_OK(statuses[3]);
7448
+ ASSERT_EQ(values[3].ToString(), "val_c");
7449
+
7450
+ // Verify std::string MultiGet wrapper still works
7451
+ std::vector<std::string> str_values;
7452
+ statuses = reader->MultiGet(ReadOptions(), keys, &str_values);
7453
+ ASSERT_EQ(str_values.size(), keys.size());
7454
+ ASSERT_OK(statuses[0]);
7455
+ ASSERT_EQ(str_values[0], "val_a");
7456
+ ASSERT_OK(statuses[1]);
7457
+ ASSERT_EQ(str_values[1], "val_b");
7458
+ ASSERT_TRUE(statuses[2].IsNotFound());
7459
+ ASSERT_OK(statuses[3]);
7460
+ ASSERT_EQ(str_values[3], "val_c");
7072
7461
  }
7073
7462
 
7074
7463
  TEST_F(ExternalTableTest, ExternalFileChecksumTest) {
@@ -7467,10 +7856,23 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7467
7856
  Status NewBuilder(
7468
7857
  const UserDefinedIndexOption& /*option*/,
7469
7858
  std::unique_ptr<UserDefinedIndexBuilder>& builder) const override {
7470
- builder = std::make_unique<TestUserDefinedIndexBuilder>();
7859
+ auto b = std::make_unique<TestUserDefinedIndexBuilder>();
7860
+ b->skip_key_size_check_ = skip_key_size_check_;
7861
+ // Share the factory's key_type_log so tests can inspect after flush.
7862
+ b->shared_key_type_log_ = &key_type_log_;
7863
+ builder = std::move(b);
7471
7864
  return Status::OK();
7472
7865
  }
7473
7866
 
7867
+ // When true, builders skip key-size assertions (for variable-length keys).
7868
+ bool skip_key_size_check_ = false;
7869
+
7870
+ // Accumulated log of (key, ValueType) pairs from all builders created
7871
+ // by this factory. Tests can inspect this after flush/compaction.
7872
+ mutable std::vector<
7873
+ std::pair<std::string, UserDefinedIndexBuilder::ValueType>>
7874
+ key_type_log_;
7875
+
7474
7876
  struct CustomizedMapComparator {
7475
7877
  CustomizedMapComparator(const Comparator* _comparator)
7476
7878
  : comparator(_comparator) {}
@@ -7507,13 +7909,16 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7507
7909
  Slice AddIndexEntry(const Slice& last_key_in_current_block,
7508
7910
  const Slice* first_key_in_next_block,
7509
7911
  const BlockHandle& block_handle,
7510
- std::string* separator_scratch) override {
7912
+ std::string* separator_scratch,
7913
+ const IndexEntryContext& /*context*/) override {
7511
7914
  if (keys_added_ == 0) {
7512
7915
  return last_key_in_current_block;
7513
7916
  }
7514
- EXPECT_EQ(last_key_in_current_block.size(), 5);
7515
- if (first_key_in_next_block) {
7516
- EXPECT_EQ(first_key_in_next_block->size(), 5);
7917
+ if (!skip_key_size_check_) {
7918
+ EXPECT_EQ(last_key_in_current_block.size(), 5);
7919
+ if (first_key_in_next_block) {
7920
+ EXPECT_EQ(first_key_in_next_block->size(), 5);
7921
+ }
7517
7922
  }
7518
7923
  // Unused parameters
7519
7924
  (void)separator_scratch;
@@ -7530,18 +7935,27 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7530
7935
  return last_key_in_current_block;
7531
7936
  }
7532
7937
 
7533
- void OnKeyAdded(const Slice& key, ValueType /*value*/,
7938
+ void OnKeyAdded(const Slice& key, ValueType type,
7534
7939
  const Slice& /*value*/) override {
7535
7940
  if (key.starts_with("dummy")) {
7536
7941
  return;
7537
7942
  }
7538
- EXPECT_EQ(key.size(), 5);
7539
- // Track keys added to the index
7943
+ if (!skip_key_size_check_) {
7944
+ EXPECT_EQ(key.size(), 5);
7945
+ }
7946
+ // Record the ValueType for each key so tests can verify the mapping.
7947
+ if (shared_key_type_log_) {
7948
+ shared_key_type_log_->emplace_back(key.ToString(), type);
7949
+ }
7950
+ // Track keys added to the current block (used by AddIndexEntry).
7540
7951
  keys_added_++;
7541
- // Add dummy entry
7542
- PutFixed64(&index_data_[key.ToString()], 0);
7543
- PutFixed64(&index_data_[key.ToString()], 0);
7544
- PutFixed32(&index_data_[key.ToString()], 0);
7952
+ if (!skip_key_size_check_) {
7953
+ // For fixed-size key tests, add a dummy per-key entry that the
7954
+ // TestUserDefinedIndexReader can parse alongside block-level entries.
7955
+ PutFixed64(&index_data_[key.ToString()], 0);
7956
+ PutFixed64(&index_data_[key.ToString()], 0);
7957
+ PutFixed32(&index_data_[key.ToString()], 0);
7958
+ }
7545
7959
  }
7546
7960
 
7547
7961
  Status Finish(Slice* index_contents) override {
@@ -7562,6 +7976,14 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7562
7976
 
7563
7977
  int GetEntriesAdded() const { return entries_added_; }
7564
7978
 
7979
+ // When true, skip the EXPECT_EQ(key.size(), 5) checks, allowing
7980
+ // variable-length keys (e.g., from DB flush/compaction).
7981
+ bool skip_key_size_check_ = false;
7982
+
7983
+ // Points to the factory's shared log vector. Set by the factory.
7984
+ mutable std::vector<std::pair<std::string, ValueType>>*
7985
+ shared_key_type_log_ = nullptr;
7986
+
7565
7987
  private:
7566
7988
  int entries_added_;
7567
7989
  std::map<std::string, std::string> index_data_;
@@ -7623,8 +8045,8 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7623
8045
  next_error_count_(factory->next_error_count_),
7624
8046
  comparator_(comparator) {}
7625
8047
 
7626
- Status SeekAndGetResult(const Slice& key,
7627
- IterateResult* result) override {
8048
+ Status SeekAndGetResult(const Slice& key, IterateResult* result,
8049
+ const SeekContext& /*context*/) override {
7628
8050
  Status s;
7629
8051
  if (seek_error_count_) {
7630
8052
  seek_error_count_--;
@@ -7648,11 +8070,16 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7648
8070
  iter_ = index_.lower_bound(key.ToString());
7649
8071
  if ((iter_ != index_.end()) && IsInbound()) {
7650
8072
  AdvanceToNextIndexEntry();
7651
- result->bound_check_result = IterBoundCheck::kInbound;
7652
- result->key = Slice(iter_->first);
7653
- if (scan_opts_ && target_num_keys_ > 0 &&
7654
- comparator_->Compare(key, iter_->first) == 0) {
7655
- target_num_keys_--;
8073
+ if (iter_ != index_.end()) {
8074
+ result->bound_check_result = IterBoundCheck::kInbound;
8075
+ result->key = Slice(iter_->first);
8076
+ if (scan_opts_ && target_num_keys_ > 0 &&
8077
+ comparator_->Compare(key, iter_->first) == 0) {
8078
+ target_num_keys_--;
8079
+ }
8080
+ } else {
8081
+ result->bound_check_result = IterBoundCheck::kUnknown;
8082
+ result->key = Slice();
7656
8083
  }
7657
8084
  } else {
7658
8085
  result->bound_check_result = IterBoundCheck::kOutOfBound;
@@ -7687,10 +8114,16 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7687
8114
  iter_++;
7688
8115
  if ((iter_ != index_.end()) && IsInbound()) {
7689
8116
  AdvanceToNextIndexEntry();
7690
- result->bound_check_result = IterBoundCheck::kInbound;
7691
- result->key = Slice(iter_->first);
7692
- target_num_keys_ -=
7693
- std::min(target_num_keys_, iter_->second.second);
8117
+ if (iter_ != index_.end()) {
8118
+ result->bound_check_result = IterBoundCheck::kInbound;
8119
+ result->key = Slice(iter_->first);
8120
+ target_num_keys_ -=
8121
+ std::min(target_num_keys_, iter_->second.second);
8122
+ } else {
8123
+ // AdvanceToNextIndexEntry reached end of map.
8124
+ result->bound_check_result = IterBoundCheck::kUnknown;
8125
+ result->key = Slice();
8126
+ }
7694
8127
  } else {
7695
8128
  // EOF
7696
8129
  result->bound_check_result = IterBoundCheck::kUnknown;
@@ -7700,7 +8133,7 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase {
7700
8133
  }
7701
8134
 
7702
8135
  void AdvanceToNextIndexEntry() {
7703
- while (iter_->second.second == 0) {
8136
+ while (iter_ != index_.end() && iter_->second.second == 0) {
7704
8137
  iter_++;
7705
8138
  }
7706
8139
  }
@@ -7920,7 +8353,8 @@ void UserDefinedIndexTestBase::BasicTest(bool use_partitioned_index) {
7920
8353
  /* unique_id */ {}, /* largest_seqno */ 0,
7921
8354
  /* tail_size */ 0, ioptions.persist_user_defined_timestamps);
7922
8355
  // Verify that the user-defined index was created
7923
- std::string meta_block_name = kUserDefinedIndexPrefix + "test_index";
8356
+ std::string meta_block_name =
8357
+ std::string(kUserDefinedIndexPrefix) + "test_index";
7924
8358
  BlockHandle block_handle;
7925
8359
  uint64_t file_size = 0;
7926
8360
  std::unique_ptr<FSRandomAccessFile> file;
@@ -8069,31 +8503,273 @@ TEST_P(UserDefinedIndexTest, InvalidArgumentTest1) {
8069
8503
  writer.reset();
8070
8504
  }
8071
8505
 
8072
- TEST_P(UserDefinedIndexTest, InvalidArgumentTest2) {
8506
+ TEST_P(UserDefinedIndexTest, MergeWithUDI) {
8507
+ // Verify that Merge operations work correctly with user-defined index.
8073
8508
  BlockBasedTableOptions table_options;
8074
8509
  std::string dbname = test::PerThreadDBPath("user_defined_index_test");
8075
8510
  std::string ingest_file = dbname + "test.sst";
8076
8511
 
8077
- // Set up the user-defined index factory
8078
8512
  auto user_defined_index_factory =
8079
8513
  std::make_shared<TestUserDefinedIndexFactory>();
8080
8514
  table_options.user_defined_index_factory = user_defined_index_factory;
8081
-
8082
- // Set up custom flush block policy that flushes every 3 keys
8083
8515
  table_options.flush_block_policy_factory =
8084
8516
  std::make_shared<CustomFlushBlockPolicyFactory>();
8085
8517
 
8086
8518
  options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
8519
+ options_.merge_operator = MergeOperators::CreateStringAppendOperator();
8087
8520
 
8088
8521
  std::unique_ptr<SstFileWriter> writer;
8089
8522
  writer.reset(new SstFileWriter(EnvOptions(), options_));
8090
8523
  ASSERT_OK(writer->Open(ingest_file));
8091
8524
 
8092
- std::string key = "foo";
8093
- std::string value = "bar";
8094
- ASSERT_OK(writer->Merge(key, value));
8095
- ASSERT_EQ(writer->Finish(), Status::InvalidArgument());
8525
+ // Use 5-byte keys to match TestUserDefinedIndexBuilder expectations.
8526
+ ASSERT_OK(writer->Merge("key_a", "val_a"));
8527
+ ASSERT_OK(writer->Finish());
8096
8528
  writer.reset();
8529
+
8530
+ // Read back and verify the merge entry is present in the SST.
8531
+ SstFileReader reader(options_);
8532
+ ASSERT_OK(reader.Open(ingest_file));
8533
+ ReadOptions ro;
8534
+ std::unique_ptr<Iterator> iter(reader.NewIterator(ro));
8535
+ iter->SeekToFirst();
8536
+ ASSERT_TRUE(iter->Valid());
8537
+ ASSERT_EQ(iter->key().ToString(), "key_a");
8538
+ ASSERT_EQ(iter->value().ToString(), "val_a");
8539
+ iter->Next();
8540
+ ASSERT_FALSE(iter->Valid());
8541
+ ASSERT_OK(iter->status());
8542
+ }
8543
+
8544
+ TEST_P(UserDefinedIndexTest, DBFlushWithMixedOpsAndUDI) {
8545
+ // Verify that Put, Delete, Merge, and SingleDelete all flow correctly
8546
+ // through the UDI builder when flushed via DB::Flush.
8547
+ std::string dbname = test::PerThreadDBPath("udi_db_flush_test");
8548
+ ASSERT_OK(DestroyDB(dbname, options_));
8549
+
8550
+ BlockBasedTableOptions table_options;
8551
+ auto user_defined_index_factory =
8552
+ std::make_shared<TestUserDefinedIndexFactory>();
8553
+ user_defined_index_factory->skip_key_size_check_ = true;
8554
+ table_options.user_defined_index_factory = user_defined_index_factory;
8555
+ table_options.flush_block_policy_factory =
8556
+ std::make_shared<CustomFlushBlockPolicyFactory>();
8557
+ options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
8558
+ options_.merge_operator = MergeOperators::CreateStringAppendOperator();
8559
+ options_.create_if_missing = true;
8560
+
8561
+ std::unique_ptr<DB> db;
8562
+ ASSERT_OK(DB::Open(options_, dbname, &db));
8563
+
8564
+ // Write mixed operations.
8565
+ ASSERT_OK(db->Put(WriteOptions(), "key_aa", "val_put"));
8566
+ ASSERT_OK(db->Merge(WriteOptions(), "key_bb", "val_merge"));
8567
+ ASSERT_OK(db->Delete(WriteOptions(), "key_cc"));
8568
+ ASSERT_OK(db->Put(WriteOptions(), "key_dd", "val_put2"));
8569
+ ASSERT_OK(db->SingleDelete(WriteOptions(), "key_dd"));
8570
+ ASSERT_OK(db->Put(WriteOptions(), "key_ee", "val_put3"));
8571
+
8572
+ // Flush to produce an SST with UDI.
8573
+ ASSERT_OK(db->Flush(FlushOptions()));
8574
+
8575
+ // Verify data is readable via the native index (which always works with
8576
+ // SeekToFirst). key_aa (put), key_bb (merge), key_ee (put) should be
8577
+ // visible. key_cc was deleted, key_dd was single-deleted.
8578
+ {
8579
+ ReadOptions ro;
8580
+ std::unique_ptr<Iterator> iter(db->NewIterator(ro));
8581
+ iter->SeekToFirst();
8582
+ std::vector<std::string> visible;
8583
+ for (; iter->Valid(); iter->Next()) {
8584
+ visible.push_back(iter->key().ToString());
8585
+ }
8586
+ ASSERT_OK(iter->status());
8587
+ ASSERT_EQ(visible.size(), 3u);
8588
+ // With reverse comparator, keys are in reverse order.
8589
+ if (is_reverse_comparator_) {
8590
+ std::vector<std::string> expected = {"key_ee", "key_bb", "key_aa"};
8591
+ ASSERT_EQ(visible, expected);
8592
+ } else {
8593
+ std::vector<std::string> expected = {"key_aa", "key_bb", "key_ee"};
8594
+ ASSERT_EQ(visible, expected);
8595
+ }
8596
+ }
8597
+
8598
+ ASSERT_OK(db->Close());
8599
+ ASSERT_OK(DestroyDB(dbname, options_));
8600
+ }
8601
+
8602
+ TEST_P(UserDefinedIndexTest, ValueTypeMappingViaDBFlush) {
8603
+ // Verify that MapToUDIValueType correctly maps internal ValueTypes to UDI
8604
+ // ValueTypes by writing various operation types via the DB API, flushing,
8605
+ // and inspecting what the TestUserDefinedIndexBuilder received.
8606
+ if (is_reverse_comparator_) {
8607
+ // Skip for reverse comparator — the key ordering makes this test
8608
+ // unnecessarily complex and the mapping logic is comparator-independent.
8609
+ ROCKSDB_GTEST_SKIP("Skipped for reverse comparator");
8610
+ return;
8611
+ }
8612
+ std::string dbname = test::PerThreadDBPath("udi_valuetype_mapping_test");
8613
+ ASSERT_OK(DestroyDB(dbname, options_));
8614
+
8615
+ BlockBasedTableOptions table_options;
8616
+ auto user_defined_index_factory =
8617
+ std::make_shared<TestUserDefinedIndexFactory>();
8618
+ user_defined_index_factory->skip_key_size_check_ = true;
8619
+ table_options.user_defined_index_factory = user_defined_index_factory;
8620
+ options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
8621
+ options_.merge_operator = MergeOperators::CreateStringAppendOperator();
8622
+ options_.create_if_missing = true;
8623
+ options_.disable_auto_compactions = true;
8624
+
8625
+ std::unique_ptr<DB> db;
8626
+ ASSERT_OK(DB::Open(options_, dbname, &db));
8627
+
8628
+ // Write one entry of each type that goes through the flush path.
8629
+ // kTypeValue:
8630
+ ASSERT_OK(db->Put(WriteOptions(), "key_01_put", "v1"));
8631
+ // kTypeMerge:
8632
+ ASSERT_OK(db->Merge(WriteOptions(), "key_02_merge", "m1"));
8633
+ // kTypeDeletion:
8634
+ ASSERT_OK(db->Delete(WriteOptions(), "key_03_del"));
8635
+ // kTypeSingleDeletion:
8636
+ ASSERT_OK(db->SingleDelete(WriteOptions(), "key_04_sdel"));
8637
+ // kTypeWideColumnEntity:
8638
+ ASSERT_OK(db->PutEntity(WriteOptions(), db->DefaultColumnFamily(),
8639
+ "key_05_entity", WideColumns{{"col1", "val1"}}));
8640
+
8641
+ ASSERT_OK(db->Flush(FlushOptions()));
8642
+
8643
+ // The builder recorded all (key, ValueType) pairs via the shared log.
8644
+ const auto& log = user_defined_index_factory->key_type_log_;
8645
+ ASSERT_FALSE(log.empty());
8646
+
8647
+ // Build a map from key to the ValueType received by OnKeyAdded.
8648
+ std::map<std::string, UserDefinedIndexBuilder::ValueType> type_map;
8649
+ for (const auto& entry : log) {
8650
+ type_map[entry.first] = entry.second;
8651
+ }
8652
+
8653
+ // Verify each mapping.
8654
+ ASSERT_EQ(type_map.count("key_01_put"), 1u);
8655
+ EXPECT_EQ(type_map["key_01_put"], UserDefinedIndexBuilder::kValue);
8656
+
8657
+ ASSERT_EQ(type_map.count("key_02_merge"), 1u);
8658
+ EXPECT_EQ(type_map["key_02_merge"], UserDefinedIndexBuilder::kMerge);
8659
+
8660
+ ASSERT_EQ(type_map.count("key_03_del"), 1u);
8661
+ EXPECT_EQ(type_map["key_03_del"], UserDefinedIndexBuilder::kDelete);
8662
+
8663
+ ASSERT_EQ(type_map.count("key_04_sdel"), 1u);
8664
+ EXPECT_EQ(type_map["key_04_sdel"], UserDefinedIndexBuilder::kDelete);
8665
+
8666
+ ASSERT_EQ(type_map.count("key_05_entity"), 1u);
8667
+ EXPECT_EQ(type_map["key_05_entity"], UserDefinedIndexBuilder::kOther);
8668
+
8669
+ ASSERT_OK(db->Close());
8670
+ ASSERT_OK(DestroyDB(dbname, options_));
8671
+ }
8672
+
8673
+ TEST_P(UserDefinedIndexTest, CompactionWithSnapshotsAndUDI) {
8674
+ // Verify that compaction with snapshots (producing multiple versions of the
8675
+ // same user key) works correctly with UDI.
8676
+ if (is_reverse_comparator_) {
8677
+ ROCKSDB_GTEST_SKIP("Skipped for reverse comparator");
8678
+ return;
8679
+ }
8680
+ std::string dbname = test::PerThreadDBPath("udi_compaction_snapshot_test");
8681
+ ASSERT_OK(DestroyDB(dbname, options_));
8682
+
8683
+ BlockBasedTableOptions table_options;
8684
+ auto user_defined_index_factory =
8685
+ std::make_shared<TestUserDefinedIndexFactory>();
8686
+ user_defined_index_factory->skip_key_size_check_ = true;
8687
+ table_options.user_defined_index_factory = user_defined_index_factory;
8688
+ options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
8689
+ options_.create_if_missing = true;
8690
+ // Disable auto-compaction so we control when compaction runs.
8691
+ options_.disable_auto_compactions = true;
8692
+
8693
+ std::unique_ptr<DB> db;
8694
+ ASSERT_OK(DB::Open(options_, dbname, &db));
8695
+
8696
+ // Write version 1 and flush.
8697
+ ASSERT_OK(db->Put(WriteOptions(), "key_aa", "v1"));
8698
+ ASSERT_OK(db->Put(WriteOptions(), "key_bb", "v1"));
8699
+ ASSERT_OK(db->Flush(FlushOptions()));
8700
+
8701
+ // Take a snapshot to force compaction to keep both versions.
8702
+ const Snapshot* snap = db->GetSnapshot();
8703
+
8704
+ // Write version 2 and flush (creates a second L0 file).
8705
+ ASSERT_OK(db->Put(WriteOptions(), "key_aa", "v2"));
8706
+ ASSERT_OK(db->Delete(WriteOptions(), "key_bb"));
8707
+ ASSERT_OK(db->Flush(FlushOptions()));
8708
+
8709
+ // Compact L0 → L1. With the snapshot held, both versions of key_aa
8710
+ // and the delete tombstone for key_bb must be preserved in the compaction
8711
+ // output. The UDI builder receives multiple entries for key_aa.
8712
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
8713
+
8714
+ // Verify the UDI builder saw entries during compaction. The key_type_log
8715
+ // accumulates from all builders (two flushes + one compaction). The
8716
+ // compaction output must contain multiple versions of key_aa (v2 and v1,
8717
+ // due to the snapshot) and both the delete tombstone and old value of key_bb.
8718
+ const auto& log = user_defined_index_factory->key_type_log_;
8719
+ ASSERT_FALSE(log.empty());
8720
+
8721
+ // Count total occurrences of key_aa across all builders — at least 4:
8722
+ // flush1 (v1) + flush2 (v2) + compaction (v2, v1).
8723
+ int key_aa_count = 0;
8724
+ int key_bb_count = 0;
8725
+ for (const auto& entry : log) {
8726
+ if (entry.first == "key_aa") {
8727
+ key_aa_count++;
8728
+ } else if (entry.first == "key_bb") {
8729
+ key_bb_count++;
8730
+ }
8731
+ }
8732
+ // flush1 (1) + flush2 (1) + compaction (2 versions due to snapshot) = 4.
8733
+ ASSERT_GE(key_aa_count, 4) << "Expected key_aa from flush1 + flush2 + "
8734
+ "compaction (2 versions due to snapshot)";
8735
+ // flush1 (1) + flush2 (1) + compaction (tombstone + old value) = 4.
8736
+ ASSERT_GE(key_bb_count, 4) << "Expected key_bb from flush1 + flush2 + "
8737
+ "compaction (tombstone + old value)";
8738
+
8739
+ // Verify current view via native index: key_aa=v2, key_bb deleted.
8740
+ {
8741
+ ReadOptions ro;
8742
+ std::unique_ptr<Iterator> iter(db->NewIterator(ro));
8743
+ iter->SeekToFirst();
8744
+ ASSERT_TRUE(iter->Valid());
8745
+ ASSERT_EQ(iter->key().ToString(), "key_aa");
8746
+ ASSERT_EQ(iter->value().ToString(), "v2");
8747
+ iter->Next();
8748
+ ASSERT_FALSE(iter->Valid());
8749
+ ASSERT_OK(iter->status());
8750
+ }
8751
+
8752
+ // Verify snapshot view via native index: key_aa=v1, key_bb=v1.
8753
+ {
8754
+ ReadOptions ro;
8755
+ ro.snapshot = snap;
8756
+ std::unique_ptr<Iterator> iter(db->NewIterator(ro));
8757
+ iter->SeekToFirst();
8758
+ ASSERT_TRUE(iter->Valid());
8759
+ ASSERT_EQ(iter->key().ToString(), "key_aa");
8760
+ ASSERT_EQ(iter->value().ToString(), "v1");
8761
+ iter->Next();
8762
+ ASSERT_TRUE(iter->Valid());
8763
+ ASSERT_EQ(iter->key().ToString(), "key_bb");
8764
+ ASSERT_EQ(iter->value().ToString(), "v1");
8765
+ iter->Next();
8766
+ ASSERT_FALSE(iter->Valid());
8767
+ ASSERT_OK(iter->status());
8768
+ }
8769
+
8770
+ db->ReleaseSnapshot(snap);
8771
+ ASSERT_OK(db->Close());
8772
+ ASSERT_OK(DestroyDB(dbname, options_));
8097
8773
  }
8098
8774
 
8099
8775
  TEST_P(UserDefinedIndexTest, IngestTest) {