@nxtedition/rocksdb 15.4.1 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. package/binding.cc +70 -23
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/index.js +70 -10
  395. package/iterator.js +25 -3
  396. package/max_rev_operator.h +9 -5
  397. package/package.json +1 -1
  398. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  399. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -9,6 +9,7 @@
9
9
  #include <string>
10
10
  #include <unordered_map>
11
11
 
12
+ #include "db/seqno_to_time_mapping.h"
12
13
  #include "rocksdb/slice.h"
13
14
  #include "rocksdb/status.h"
14
15
  #include "rocksdb/user_defined_index.h"
@@ -43,34 +44,48 @@ class UserDefinedIndexBuilderWrapper : public IndexBuilder {
43
44
  const BlockHandle& block_handle,
44
45
  std::string* separator_scratch,
45
46
  bool skip_delta_encoding) override {
46
- UserDefinedIndexBuilder::BlockHandle handle;
47
+ UserDefinedIndexBuilder::BlockHandle handle{};
47
48
  handle.offset = block_handle.offset();
48
49
  handle.size = block_handle.size();
49
- // Forward the call to both index builders
50
+ // Forward the call to both index builders.
51
+ // Parse the internal keys to extract user keys and sequence numbers.
52
+ // There's no way to return an error here, so we remember the status and
53
+ // return it in Finish().
50
54
  ParsedInternalKey pkey_last;
51
55
  ParsedInternalKey pkey_first;
52
- // There's no way to return an error here, so we remember the statsu and
53
- // return it in Finish()
54
56
  if (status_.ok()) {
55
57
  status_ = ParseInternalKey(last_key_in_current_block, &pkey_last,
56
- /*lof_err_key*/ false);
58
+ /*log_err_key*/ false);
57
59
  }
58
60
  if (status_.ok() && first_key_in_next_block) {
59
61
  status_ = ParseInternalKey(*first_key_in_next_block, &pkey_first,
60
- /*lof_err_key*/ false);
62
+ /*log_err_key*/ false);
61
63
  }
62
64
  if (status_.ok()) {
65
+ // Pass both user keys AND sequence numbers to the UDI builder via
66
+ // the IndexEntryContext. The sequence numbers are needed when the
67
+ // same user key spans a data block boundary (e.g., due to snapshots
68
+ // keeping multiple versions). Without sequence numbers, the UDI
69
+ // cannot produce a separator that distinguishes the two blocks,
70
+ // causing incorrect Seek results.
71
+ UserDefinedIndexBuilder::IndexEntryContext ctx;
72
+ ctx.last_key_seq = pkey_last.sequence;
73
+ ctx.first_key_seq = first_key_in_next_block ? pkey_first.sequence : 0;
63
74
  user_defined_index_builder_->AddIndexEntry(
64
75
  pkey_last.user_key,
65
76
  first_key_in_next_block ? &pkey_first.user_key : nullptr, handle,
66
- separator_scratch);
77
+ separator_scratch, ctx);
67
78
  }
68
79
  return internal_index_builder_->AddIndexEntry(
69
80
  last_key_in_current_block, first_key_in_next_block, block_handle,
70
81
  separator_scratch, skip_delta_encoding);
71
82
  }
72
83
 
73
- // Not supported with parallel compression
84
+ // Parallel compression splits AddIndexEntry() into PrepareIndexEntry() (emit
85
+ // thread) and FinishIndexEntry() (worker thread). This wrapper does not
86
+ // implement that split yet, so parallel compression is rejected at option
87
+ // validation time (see BlockBasedTableFactory::ValidateOptions and the Rep
88
+ // constructor). These stubs exist only to satisfy the interface.
74
89
  std::unique_ptr<PreparedIndexEntry> CreatePreparedIndexEntry() override {
75
90
  return nullptr;
76
91
  }
@@ -93,35 +108,43 @@ class UserDefinedIndexBuilderWrapper : public IndexBuilder {
93
108
 
94
109
  void OnKeyAdded(const Slice& key,
95
110
  const std::optional<Slice>& value) override {
111
+ // Always forward to internal index builder first. It relies on receiving
112
+ // OnKeyAdded for every key to maintain state (e.g.,
113
+ // current_block_first_internal_key_) needed by AddIndexEntry, which is
114
+ // always forwarded regardless of UDI status.
115
+ internal_index_builder_->OnKeyAdded(key, value);
116
+
96
117
  ParsedInternalKey pkey;
97
118
  if (status_.ok()) {
119
+ // Defensive: value should always be present since OnKeyAdded() is called
120
+ // on the main thread in Add() with the original value Slice. No current
121
+ // code path passes std::nullopt here.
98
122
  if (!value.has_value()) {
123
+ assert(false);
99
124
  status_ = Status::InvalidArgument(
100
- "user_defined_index_factory not supported with parallel "
101
- "compression");
125
+ "OnKeyAdded called without a value; UDI requires the value to "
126
+ "forward to the plugin builder");
102
127
  } else {
103
- status_ = ParseInternalKey(key, &pkey, /*lof_err_key*/ false);
104
- if (status_.ok() && pkey.type != ValueType::kTypeValue) {
105
- status_ = Status::InvalidArgument(
106
- "user_defined_index_factory only supported with Puts");
107
- }
128
+ status_ = ParseInternalKey(key, &pkey, /*log_err_key*/ false);
108
129
  }
109
130
  }
110
131
  if (!status_.ok()) {
111
132
  return;
112
133
  }
113
134
 
114
- // Forward the call to both index builders
115
- internal_index_builder_->OnKeyAdded(key, value);
116
-
117
- // Pass the user key to the UDI. We don't expect multiple entries with
118
- // different sequence numbers for the same key in the file. RocksDB may
119
- // enforce it in the future by allowing UDIs only for read only
120
- // bulkloaded use cases, and only allow ingestion of files with
121
- // sequence number 0.
135
+ // Pass the user key to the UDI with the mapped value type. In SST files
136
+ // produced by flush or compaction, there may be multiple entries for the
137
+ // same user key with different sequence numbers (e.g., when snapshots are
138
+ // active). UDI builders that use OnKeyAdded() should handle this; builders
139
+ // that only use AddIndexEntry() separator keys (e.g., trie) are unaffected.
140
+ Slice udi_value = value.value();
141
+ if (pkey.type == kTypeValuePreferredSeqno) {
142
+ // Strip the packed preferred seqno suffix so the UDI plugin receives
143
+ // only the user value, consistent with the kValue contract.
144
+ udi_value = ParsePackedValueForValue(udi_value);
145
+ }
122
146
  user_defined_index_builder_->OnKeyAdded(
123
- pkey.user_key, UserDefinedIndexBuilder::ValueType::kValue,
124
- value.value());
147
+ pkey.user_key, MapToUDIValueType(pkey.type), udi_value);
125
148
  }
126
149
 
127
150
  Status Finish(IndexBlocks* index_blocks,
@@ -158,13 +181,39 @@ class UserDefinedIndexBuilderWrapper : public IndexBuilder {
158
181
 
159
182
  size_t IndexSize() const override { return index_size_; }
160
183
 
161
- uint64_t CurrentIndexSizeEstimate() const override { return 0; }
184
+ uint64_t CurrentIndexSizeEstimate() const override {
185
+ return internal_index_builder_->CurrentIndexSizeEstimate();
186
+ }
162
187
 
163
188
  bool separator_is_key_plus_seq() override {
164
189
  return internal_index_builder_->separator_is_key_plus_seq();
165
190
  }
166
191
 
167
192
  private:
193
+ static UserDefinedIndexBuilder::ValueType MapToUDIValueType(
194
+ ROCKSDB_NAMESPACE::ValueType t) {
195
+ switch (t) {
196
+ case kTypeValue:
197
+ case kTypeValuePreferredSeqno:
198
+ return UserDefinedIndexBuilder::kValue;
199
+ case kTypeDeletion:
200
+ case kTypeSingleDeletion:
201
+ case kTypeDeletionWithTimestamp:
202
+ return UserDefinedIndexBuilder::kDelete;
203
+ case kTypeMerge:
204
+ return UserDefinedIndexBuilder::kMerge;
205
+ case kTypeBlobIndex:
206
+ case kTypeWideColumnEntity:
207
+ return UserDefinedIndexBuilder::kOther;
208
+ default:
209
+ // Any new type that reaches OnKeyAdded() should be explicitly mapped
210
+ // above. Falling through to kOther is a safe default but indicates a
211
+ // missing case that should be added.
212
+ assert(false);
213
+ return UserDefinedIndexBuilder::kOther;
214
+ }
215
+ }
216
+
168
217
  const std::string name_;
169
218
  std::unique_ptr<IndexBuilder> internal_index_builder_;
170
219
  std::unique_ptr<UserDefinedIndexBuilder> user_defined_index_builder_;
@@ -179,13 +228,24 @@ class UserDefinedIndexIteratorWrapper
179
228
  std::unique_ptr<UserDefinedIndexIterator>&& udi_iter)
180
229
  : udi_iter_(std::move(udi_iter)), valid_(false) {}
181
230
 
231
+ ~UserDefinedIndexIteratorWrapper() override = default;
232
+
182
233
  bool Valid() const override { return valid_; }
183
234
 
184
235
  void SeekToFirst() override {
185
- status_ = Status::NotSupported("SeekToFirst not supported");
236
+ status_ = udi_iter_->SeekToFirstAndGetResult(&result_);
237
+ if (status_.ok()) {
238
+ valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
239
+ if (valid_) {
240
+ SetInternalKeyFromUDIResult();
241
+ }
242
+ } else {
243
+ valid_ = false;
244
+ }
186
245
  }
187
246
 
188
247
  void SeekToLast() override {
248
+ valid_ = false;
189
249
  status_ = Status::NotSupported("SeekToLast not supported");
190
250
  }
191
251
 
@@ -193,12 +253,19 @@ class UserDefinedIndexIteratorWrapper
193
253
  ParsedInternalKey pkey;
194
254
  status_ = ParseInternalKey(target, &pkey, /*log_err_key=*/false);
195
255
  if (status_.ok()) {
196
- status_ = udi_iter_->SeekAndGetResult(pkey.user_key, &result_);
256
+ // Pass both user key AND sequence number to the UDI iterator via
257
+ // SeekContext. The sequence number is needed when the same user key
258
+ // spans multiple data blocks with different sequence numbers (e.g.,
259
+ // due to snapshots). Without it, the UDI cannot distinguish which
260
+ // block to return for a given (user_key, seqno) target.
261
+ UserDefinedIndexIterator::SeekContext ctx;
262
+ ctx.target_seq = pkey.sequence;
263
+ status_ = udi_iter_->SeekAndGetResult(pkey.user_key, &result_, ctx);
197
264
  }
198
265
  if (status_.ok()) {
199
266
  valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
200
267
  if (valid_) {
201
- ikey_.Set(result_.key, 0, ValueType::kTypeValue);
268
+ SetInternalKeyFromUDIResult();
202
269
  }
203
270
  } else {
204
271
  valid_ = false;
@@ -210,7 +277,7 @@ class UserDefinedIndexIteratorWrapper
210
277
  if (status_.ok()) {
211
278
  valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
212
279
  if (valid_) {
213
- ikey_.Set(result_.key, 0, ValueType::kTypeValue);
280
+ SetInternalKeyFromUDIResult();
214
281
  }
215
282
  } else {
216
283
  valid_ = false;
@@ -222,11 +289,11 @@ class UserDefinedIndexIteratorWrapper
222
289
  if (status_.ok()) {
223
290
  valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
224
291
  if (valid_) {
225
- ikey_.Set(result_.key, 0, ValueType::kTypeValue);
226
- }
227
- if (status_.ok()) {
228
- *result = result_;
292
+ SetInternalKeyFromUDIResult();
293
+ result->key = key();
229
294
  }
295
+ result->bound_check_result = result_.bound_check_result;
296
+ result->value_prepared = result_.value_prepared;
230
297
  } else {
231
298
  valid_ = false;
232
299
  }
@@ -234,10 +301,14 @@ class UserDefinedIndexIteratorWrapper
234
301
  }
235
302
 
236
303
  void SeekForPrev(const Slice& /*target*/) override {
304
+ valid_ = false;
237
305
  status_ = Status::NotSupported("SeekForPrev not supported");
238
306
  }
239
307
 
240
- void Prev() override { status_ = Status::NotSupported("Prev not supported"); }
308
+ void Prev() override {
309
+ valid_ = false;
310
+ status_ = Status::NotSupported("Prev not supported");
311
+ }
241
312
 
242
313
  Slice key() const override { return Slice(*ikey_.const_rep()); }
243
314
 
@@ -261,6 +332,17 @@ class UserDefinedIndexIteratorWrapper
261
332
  }
262
333
 
263
334
  private:
335
+ // Convert the UDI result's user key into an internal key for the index
336
+ // iterator contract. UDI separators are user keys, but
337
+ // InternalIteratorBase<IndexValue> must expose internal keys (user key +
338
+ // 8-byte trailer). We use seq=0 / kTypeValue so that the resulting
339
+ // internal key compares as "greater than or equal to" any real data key
340
+ // with the same user key (lower seqno = later in internal key order),
341
+ // which is the correct upper-bound semantics for an index separator.
342
+ void SetInternalKeyFromUDIResult() {
343
+ ikey_.Set(result_.key, 0, ValueType::kTypeValue);
344
+ }
345
+
264
346
  std::unique_ptr<UserDefinedIndexIterator> udi_iter_;
265
347
  IterateResult result_;
266
348
  InternalKey ikey_;
@@ -278,7 +360,7 @@ class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader {
278
360
  reader_(std::move(reader)),
279
361
  udi_reader_(std::move(udi_reader)) {}
280
362
 
281
- virtual InternalIteratorBase<IndexValue>* NewIterator(
363
+ InternalIteratorBase<IndexValue>* NewIterator(
282
364
  const ReadOptions& read_options, bool disable_prefix_seek,
283
365
  IndexBlockIter* iter, GetContext* get_context,
284
366
  BlockCacheLookupContext* lookup_context) override {
@@ -288,32 +370,30 @@ class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader {
288
370
  }
289
371
  if (name_ != read_options.table_index_factory->Name()) {
290
372
  return NewErrorInternalIterator<IndexValue>(Status::InvalidArgument(
291
- "Bad index name" +
373
+ "Bad index name: " +
292
374
  std::string(read_options.table_index_factory->Name()) +
293
375
  ". Only supported UDI is " + name_));
294
376
  }
295
377
  std::unique_ptr<UserDefinedIndexIterator> udi_iter =
296
378
  udi_reader_->NewIterator(read_options);
297
379
  if (udi_iter) {
298
- InternalIteratorBase<IndexValue>* wrap_iter =
299
- new UserDefinedIndexIteratorWrapper(std::move(udi_iter));
300
- return wrap_iter;
380
+ return new UserDefinedIndexIteratorWrapper(std::move(udi_iter));
301
381
  }
302
382
  return NewErrorInternalIterator<IndexValue>(
303
- Status::NotFound("COuld not create UDI iterator"));
383
+ Status::NotFound("Could not create UDI iterator"));
304
384
  }
305
385
 
306
- virtual Status CacheDependencies(
307
- const ReadOptions& ro, bool pin,
308
- FilePrefetchBuffer* tail_prefetch_buffer) override {
386
+ Status CacheDependencies(const ReadOptions& ro, bool pin,
387
+ FilePrefetchBuffer* tail_prefetch_buffer) override {
309
388
  return reader_->CacheDependencies(ro, pin, tail_prefetch_buffer);
310
389
  }
311
390
 
312
391
  size_t ApproximateMemoryUsage() const override {
313
- return reader_->ApproximateMemoryUsage();
392
+ return reader_->ApproximateMemoryUsage() +
393
+ udi_reader_->ApproximateMemoryUsage();
314
394
  }
315
395
 
316
- virtual void EraseFromCacheBeforeDestruction(
396
+ void EraseFromCacheBeforeDestruction(
317
397
  uint32_t uncache_aggressiveness) override {
318
398
  reader_->EraseFromCacheBeforeDestruction(uncache_aggressiveness);
319
399
  }
@@ -29,13 +29,40 @@
29
29
 
30
30
  namespace ROCKSDB_NAMESPACE {
31
31
 
32
+ namespace {
33
+
34
+ inline void RecordBlockReadBytePerfCounter(BlockType block_type,
35
+ uint64_t block_size_with_trailer) {
36
+ switch (block_type) {
37
+ case BlockType::kData:
38
+ PERF_COUNTER_ADD(data_block_read_byte, block_size_with_trailer);
39
+ break;
40
+ case BlockType::kFilter:
41
+ case BlockType::kFilterPartitionIndex:
42
+ PERF_COUNTER_ADD(filter_block_read_byte, block_size_with_trailer);
43
+ break;
44
+ case BlockType::kCompressionDictionary:
45
+ PERF_COUNTER_ADD(compression_dict_block_read_byte,
46
+ block_size_with_trailer);
47
+ break;
48
+ case BlockType::kIndex:
49
+ PERF_COUNTER_ADD(index_block_read_byte, block_size_with_trailer);
50
+ break;
51
+ default:
52
+ PERF_COUNTER_ADD(metadata_block_read_byte, block_size_with_trailer);
53
+ break;
54
+ }
55
+ }
56
+
57
+ } // namespace
58
+
32
59
  inline void BlockFetcher::ProcessTrailerIfPresent() {
33
60
  if (footer_.GetBlockTrailerSize() > 0) {
34
61
  assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize);
35
62
  if (read_options_.verify_checksums) {
36
- io_status_ = status_to_io_status(
37
- VerifyBlockChecksum(footer_, slice_.data(), block_size_,
38
- file_->file_name(), handle_.offset()));
63
+ io_status_ = status_to_io_status(VerifyBlockChecksum(
64
+ footer_, slice_.data(), block_size_, file_->file_name(),
65
+ handle_.offset(), block_type_));
39
66
  RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
40
67
  if (!io_status_.ok()) {
41
68
  assert(io_status_.IsCorruption());
@@ -324,6 +351,7 @@ void BlockFetcher::ReadBlock(bool retry) {
324
351
  }
325
352
 
326
353
  PERF_COUNTER_ADD(block_read_byte, block_size_with_trailer_);
354
+ RecordBlockReadBytePerfCounter(block_type_, block_size_with_trailer_);
327
355
  IGNORE_STATUS_IF_ERROR(io_status_);
328
356
  if (io_status_.ok()) {
329
357
  if (use_fs_scratch_ && !read_req.status.ok()) {
@@ -319,8 +319,7 @@ class BlockFetcherTest : public testing::Test {
319
319
  PersistentCacheOptions persistent_cache_options;
320
320
  Footer footer;
321
321
  ReadFooter(file, &footer);
322
- auto mgr = GetBuiltinCompressionManager(
323
- GetCompressFormatForVersion(footer.format_version()));
322
+ auto mgr = GetBuiltinV2CompressionManager();
324
323
  std::unique_ptr<BlockFetcher> fetcher(new BlockFetcher(
325
324
  file, nullptr /* prefetch_buffer */, footer, roptions, block, contents,
326
325
  ioptions, do_uncompress, compressed, block_type,
@@ -31,7 +31,9 @@ void Multiplier(void* arg1, void* arg2) {
31
31
  TEST_F(CleanableTest, Register) {
32
32
  int n2 = 2, n3 = 3;
33
33
  int res = 1;
34
- { Cleanable c1; }
34
+ {
35
+ Cleanable c1;
36
+ }
35
37
  // ~Cleanable
36
38
  ASSERT_EQ(1, res);
37
39
 
@@ -8,6 +8,7 @@
8
8
  #include "logging/logging.h"
9
9
  #include "rocksdb/table.h"
10
10
  #include "table/block_based/block.h"
11
+ #include "table/get_context.h"
11
12
  #include "table/internal_iterator.h"
12
13
  #include "table/meta_blocks.h"
13
14
  #include "table/table_builder.h"
@@ -233,10 +234,30 @@ class ExternalTableReaderAdapter : public TableReader {
233
234
 
234
235
  size_t ApproximateMemoryUsage() const override { return 0; }
235
236
 
236
- Status Get(const ReadOptions&, const Slice&, GetContext*,
237
- const SliceTransform*, bool = false) override {
238
- return Status::NotSupported(
239
- "Get() not supported on external file iterator");
237
+ Status Get(const ReadOptions& read_options, const Slice& key,
238
+ GetContext* get_context, const SliceTransform* prefix_extractor,
239
+ bool /*skip_filters*/ = false) override {
240
+ ParsedInternalKey parsed_key;
241
+ Status s = ParseInternalKey(key, &parsed_key, /*log_err_key=*/false);
242
+ if (!s.ok()) {
243
+ return s;
244
+ }
245
+
246
+ PinnableSlice value;
247
+ s = reader_->Get(read_options, parsed_key.user_key, prefix_extractor,
248
+ &value);
249
+ if (!s.ok()) {
250
+ if (s.IsNotFound()) {
251
+ return Status::OK();
252
+ }
253
+ return s;
254
+ }
255
+
256
+ ParsedInternalKey found_key(parsed_key.user_key, 0, ValueType::kTypeValue);
257
+ bool matched = false;
258
+ get_context->SaveValue(found_key, value, &matched, &s,
259
+ value.IsPinned() ? &value : nullptr);
260
+ return s;
240
261
  }
241
262
 
242
263
  Status VerifyChecksum(const ReadOptions& /*ro*/, TableReaderCaller /*caller*/,
@@ -154,23 +154,18 @@ std::string IndexValue::ToString(bool hex, bool have_first_key) const {
154
154
 
155
155
  namespace {
156
156
  inline bool IsLegacyFooterFormat(uint64_t magic_number) {
157
- return magic_number == kLegacyBlockBasedTableMagicNumber ||
158
- magic_number == kLegacyPlainTableMagicNumber;
157
+ return magic_number == kLegacyPlainTableMagicNumber;
159
158
  }
159
+ // Used when reading format_version=0 footers (plain tables)
160
160
  inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) {
161
- if (magic_number == kLegacyBlockBasedTableMagicNumber) {
162
- return kBlockBasedTableMagicNumber;
163
- }
164
161
  if (magic_number == kLegacyPlainTableMagicNumber) {
165
162
  return kPlainTableMagicNumber;
166
163
  }
167
164
  assert(false);
168
165
  return magic_number;
169
166
  }
167
+ // Used by plain tables to write format_version=0 footers
170
168
  inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) {
171
- if (magic_number == kBlockBasedTableMagicNumber) {
172
- return kLegacyBlockBasedTableMagicNumber;
173
- }
174
169
  if (magic_number == kPlainTableMagicNumber) {
175
170
  return kLegacyPlainTableMagicNumber;
176
171
  }
@@ -178,14 +173,18 @@ inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) {
178
173
  return magic_number;
179
174
  }
180
175
  inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) {
181
- if (magic_number == kBlockBasedTableMagicNumber ||
182
- magic_number == kLegacyBlockBasedTableMagicNumber) {
176
+ if (magic_number == kBlockBasedTableMagicNumber) {
183
177
  return static_cast<uint8_t>(BlockBasedTable::kBlockTrailerSize);
184
178
  } else {
185
179
  return 0;
186
180
  }
187
181
  }
188
182
 
183
+ // NOTE: format_version 0 is still used by plain tables and format_version 1 by
184
+ // cuckoo table. For block-based tables, format_version < 2 is no longer
185
+ // supported for reading or writing. Legacy magic numbers on block-based tables
186
+ // are used only for good error reporting.
187
+ //
189
188
  // Footer format, in three parts:
190
189
  // * Part1
191
190
  // -> format_version == 0 (inferred from legacy magic number)
@@ -229,7 +228,7 @@ Status FooterBuilder::Build(uint64_t magic_number, uint32_t format_version,
229
228
  const BlockHandle& index_handle,
230
229
  uint32_t base_context_checksum) {
231
230
  assert(magic_number != Footer::kNullTableMagicNumber);
232
- assert(IsSupportedFormatVersion(format_version) ||
231
+ assert(IsSupportedFormatVersionForWrite(magic_number, format_version) ||
233
232
  TEST_AllowUnsupportedFormatVersion());
234
233
 
235
234
  char* part2;
@@ -251,6 +250,7 @@ Status FooterBuilder::Build(uint64_t magic_number, uint32_t format_version,
251
250
  EncodeFixed64(cur, magic_number);
252
251
  assert(cur + 8 == slice_.data() + slice_.size());
253
252
  } else {
253
+ // format_version == 0 is used by plain tables
254
254
  slice_ = Slice(data_.data(), Footer::kVersion0EncodedLength);
255
255
  // Legacy SST files use kCRC32c checksum but it's not stored in footer.
256
256
  assert(checksum_type == kNoChecksum || checksum_type == kCRC32c);
@@ -337,9 +337,18 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset,
337
337
  const char* magic_ptr = input.data() + input.size() - kMagicNumberLengthByte;
338
338
  uint64_t magic = DecodeFixed64(magic_ptr);
339
339
 
340
- // We check for legacy formats here and silently upconvert them
340
+ // Legacy block-based tables (format_version < 2) are no longer supported.
341
+ // (This constant is only used here and in the corresponding test.)
342
+ if (magic == 0xdb4775248b80fb57ull) {
343
+ return Status::NotSupported(
344
+ "Unsupported legacy magic number for block-based SST format. Load with "
345
+ "RocksDB >= 4.6.0 and < 11.0.0 and run full compaction to upgrade.");
346
+ }
347
+
348
+ // Check for legacy formats
341
349
  bool legacy = IsLegacyFooterFormat(magic);
342
350
  if (legacy) {
351
+ // Legacy plain tables are still supported - upconvert magic
343
352
  magic = UpconvertLegacyFooterFormat(magic);
344
353
  }
345
354
  if (enforce_table_magic_number != 0 && enforce_table_magic_number != magic) {
@@ -355,6 +364,7 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset,
355
364
  uint32_t computed_checksum = 0;
356
365
  uint64_t footer_offset = 0;
357
366
  if (legacy) {
367
+ // Legacy format (format_version=0, used by plain tables)
358
368
  // The size is already asserted to be at least kMinEncodedLength
359
369
  // at the beginning of the function
360
370
  input.remove_prefix(input.size() - kVersion0EncodedLength);
@@ -363,10 +373,11 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset,
363
373
  } else {
364
374
  part3_ptr = magic_ptr - 4;
365
375
  format_version_ = DecodeFixed32(part3_ptr);
366
- if (UNLIKELY(!IsSupportedFormatVersion(format_version_) &&
376
+ if (UNLIKELY(!IsSupportedFormatVersionForRead(magic, format_version_) &&
367
377
  !TEST_AllowUnsupportedFormatVersion())) {
368
- return Status::Corruption("Corrupt or unsupported format_version: " +
369
- std::to_string(format_version_));
378
+ return Status::Corruption("Corrupt or unsupported format_version " +
379
+ std::to_string(format_version_) +
380
+ " for magic " + std::to_string(magic));
370
381
  }
371
382
  // All known format versions >= 1 occupy exactly this many bytes.
372
383
  if (UNLIKELY(input.size() < kNewVersionsEncodedLength)) {
@@ -701,6 +712,7 @@ Status DecompressBlockData(Decompressor::Args& args, Decompressor& decompressor,
701
712
  args.compressed_data.size());
702
713
  RecordTick(ioptions.stats, BYTES_DECOMPRESSED_TO, out_contents->data.size());
703
714
  RecordTick(ioptions.stats, NUMBER_BLOCK_DECOMPRESSED);
715
+ PERF_COUNTER_ADD(block_decompress_count, 1);
704
716
 
705
717
  TEST_SYNC_POINT_CALLBACK("DecompressBlockData:TamperWithReturnValue",
706
718
  static_cast<void*>(&s));
@@ -34,7 +34,6 @@ bool ShouldReportDetailedTime(Env* env, Statistics* stats);
34
34
  // the length of the magic number in bytes.
35
35
  constexpr uint32_t kMagicNumberLengthByte = 8;
36
36
 
37
- extern const uint64_t kLegacyBlockBasedTableMagicNumber;
38
37
  extern const uint64_t kBlockBasedTableMagicNumber;
39
38
 
40
39
  extern const uint64_t kLegacyPlainTableMagicNumber;
@@ -163,22 +162,49 @@ inline uint32_t ChecksumModifierForContext(uint32_t base_context_checksum,
163
162
  return modifier & all_or_nothing;
164
163
  }
165
164
 
166
- inline uint32_t GetCompressFormatForVersion(uint32_t format_version) {
167
- // As of format_version 2, we encode compressed block with
168
- // compress_format_version == 2. Before that, the version is 1.
169
- // DO NOT CHANGE THIS FUNCTION, it affects disk format
170
- // As of format_version 7 and opening up to custom compression, the
171
- // compression format version is essentially independent of the block-based
172
- // table format version, and encoded in the compression_name table property.
173
- // Thus, this function can go away once we remove support for reading
174
- // format_version=1.
175
- return format_version >= 2 ? 2 : 1;
176
- }
165
+ constexpr uint32_t kLatestBbtFormatVersion = 7;
177
166
 
178
- constexpr uint32_t kLatestFormatVersion = 7;
167
+ // Minimum format version supported for reading SST files in block-based format.
168
+ //
169
+ // When phasing out old format versions, first increase the write minimum,
170
+ // then later (>= 6 mo) increase the read minimum when removing the
171
+ // implementation for both read and write.
172
+ constexpr uint32_t kMinSupportedBbtFormatVersionForRead = 2;
179
173
 
180
- inline bool IsSupportedFormatVersion(uint32_t version) {
181
- return version <= kLatestFormatVersion;
174
+ // Minimum format version supported for writing new SST files in block-based
175
+ // format. This should be >= kMinSupportedFormatVersionForRead.
176
+ //
177
+ // When phasing out old format versions, first increase the write minimum,
178
+ // then later (>= 6 mo) increase the read minimum when removing the
179
+ // implementation for both read and write.
180
+ constexpr uint32_t kMinSupportedBbtFormatVersionForWrite = 2;
181
+ static_assert(kMinSupportedBbtFormatVersionForWrite >=
182
+ kMinSupportedBbtFormatVersionForRead);
183
+
184
+ inline bool IsSupportedFormatVersionForRead(uint64_t magic, uint32_t version) {
185
+ if (magic == kBlockBasedTableMagicNumber) {
186
+ return version >= kMinSupportedBbtFormatVersionForRead &&
187
+ version <= kLatestBbtFormatVersion;
188
+ } else if (magic == kPlainTableMagicNumber) {
189
+ return version == 0;
190
+ } else if (magic == kCuckooTableMagicNumber) {
191
+ return version == 1;
192
+ } else {
193
+ return false;
194
+ }
195
+ }
196
+
197
+ inline bool IsSupportedFormatVersionForWrite(uint64_t magic, uint32_t version) {
198
+ if (magic == kBlockBasedTableMagicNumber) {
199
+ return version >= kMinSupportedBbtFormatVersionForWrite &&
200
+ version <= kLatestBbtFormatVersion;
201
+ } else if (magic == kPlainTableMagicNumber) {
202
+ return version == 0;
203
+ } else if (magic == kCuckooTableMagicNumber) {
204
+ return version == 1;
205
+ } else {
206
+ return false;
207
+ }
182
208
  }
183
209
 
184
210
  // Same as having a unique id in footer.
@@ -10,6 +10,7 @@
10
10
  #include "table/merging_iterator.h"
11
11
 
12
12
  #include "db/arena_wrapped_db_iter.h"
13
+ #include "monitoring/file_read_sample.h"
13
14
 
14
15
  namespace ROCKSDB_NAMESPACE {
15
16
  // MergingIterator uses a min/max heap to combine data from point iterators.