@nxtedition/rocksdb 15.4.0 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (402) hide show
  1. package/binding.cc +24 -19
  2. package/cache.js +1 -1
  3. package/chained-batch.js +12 -3
  4. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  5. package/deps/rocksdb/rocksdb/BUCK +42 -0
  6. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  7. package/deps/rocksdb/rocksdb/Makefile +59 -32
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  9. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  10. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  11. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  12. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  13. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  19. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  26. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  28. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  29. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  31. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  33. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  34. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  53. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  54. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  55. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  57. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  58. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  59. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  60. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  61. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  62. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  63. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  64. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  65. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  66. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  67. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  68. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  79. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  80. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  81. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  82. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  83. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  84. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  85. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  86. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  87. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  88. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  89. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  90. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  91. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  92. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  93. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  94. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  95. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  96. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  97. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  98. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  99. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  100. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  101. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  102. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  103. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  104. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  105. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  106. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  107. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  110. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  111. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  112. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  113. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  115. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  116. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  118. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  119. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  120. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  121. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  122. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  123. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  124. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  125. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  126. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  127. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  128. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  129. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  130. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  131. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  132. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  133. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  134. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  135. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  136. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  137. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  138. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  139. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  140. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  141. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  142. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  143. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  144. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  145. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  146. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  147. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  148. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  150. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  151. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  152. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  153. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  160. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  161. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  162. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  163. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  164. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  165. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  166. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  167. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  168. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  169. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  171. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  173. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  174. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  175. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  176. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  177. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  180. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  181. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  182. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  183. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  184. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  185. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  187. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  188. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  189. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  192. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  194. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  197. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  198. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  199. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  200. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  202. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  204. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  205. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  206. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  210. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  211. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  212. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  213. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  214. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  215. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  216. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  217. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  218. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  219. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  220. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  221. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  222. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  223. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  224. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  225. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  226. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  227. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  228. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  229. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  230. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  231. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  232. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  233. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  234. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  235. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  236. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  237. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  238. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  239. package/deps/rocksdb/rocksdb/src.mk +12 -0
  240. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  241. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  243. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  253. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  254. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  255. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  256. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  257. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  258. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  259. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  260. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  261. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  263. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  264. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  265. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  266. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  267. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  268. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  269. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  270. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  273. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  274. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  275. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  276. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  277. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  278. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  279. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  280. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  281. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  282. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  283. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  284. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  286. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  287. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  288. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  289. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  290. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  291. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  292. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  293. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  294. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  295. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  296. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  297. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  298. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  299. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  300. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  301. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  302. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  303. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  304. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  305. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  306. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  307. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  308. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  309. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  310. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  311. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  312. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  313. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  314. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  315. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  316. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  317. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  318. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  319. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  320. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  321. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  322. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  323. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  324. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  325. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  326. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  328. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  329. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  331. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  332. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  333. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  334. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  335. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  336. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  337. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  338. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  339. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  340. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  341. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  342. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  343. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  344. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  355. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  356. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  358. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  360. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  361. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  362. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  364. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  365. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  366. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  367. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  368. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  369. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  370. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  371. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  373. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  375. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  376. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  377. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  378. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  380. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  381. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  388. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  389. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  390. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  391. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  392. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  393. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  394. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  395. package/deps/rocksdb/rocksdb.gyp +7 -0
  396. package/index.js +11 -2
  397. package/iterator.js +15 -7
  398. package/package.json +1 -1
  399. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  400. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -0,0 +1,567 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "utilities/trie_index/trie_index_factory.h"
7
+
8
+ #include <algorithm>
9
+ #include <cassert>
10
+ #include <cstring>
11
+
12
+ #include "db/dbformat.h"
13
+ #include "rocksdb/comparator.h"
14
+ #include "util/coding.h"
15
+
16
+ namespace ROCKSDB_NAMESPACE {
17
+ namespace trie_index {
18
+
19
+ // ============================================================================
20
+ // TrieIndexBuilder
21
+ // ============================================================================
22
+
23
+ TrieIndexBuilder::TrieIndexBuilder(const Comparator* comparator)
24
+ : comparator_(comparator),
25
+ finished_(false),
26
+ must_use_separator_with_seq_(false) {}
27
+
28
+ Slice TrieIndexBuilder::AddIndexEntry(const Slice& last_key_in_current_block,
29
+ const Slice* first_key_in_next_block,
30
+ const BlockHandle& block_handle,
31
+ std::string* separator_scratch,
32
+ const IndexEntryContext& context) {
33
+ SequenceNumber last_key_seq = context.last_key_seq;
34
+
35
+ // Compute a short separator between the two user keys using the
36
+ // comparator. FindShortestSeparator takes `*start` as both input and output:
37
+ // input: *start == last_key_in_current_block
38
+ // output: *start modified to shortest string in [start, limit)
39
+ // If first_key_in_next_block is nullptr, this is the last block — use a
40
+ // short successor of the last key.
41
+ Slice separator;
42
+ // True when last_key and first_key_in_next_block are the same user key
43
+ // (same-user-key block boundary). Computed once and reused below for
44
+ // both the sticky flag and the per-entry seqno decision.
45
+ bool same_user_key = false;
46
+ if (first_key_in_next_block != nullptr) {
47
+ same_user_key = comparator_->Compare(last_key_in_current_block,
48
+ *first_key_in_next_block) == 0;
49
+
50
+ *separator_scratch = last_key_in_current_block.ToString();
51
+ comparator_->FindShortestSeparator(separator_scratch,
52
+ *first_key_in_next_block);
53
+ separator = Slice(*separator_scratch);
54
+
55
+ // Detect same-user-key block boundary: if the two user keys are identical,
56
+ // FindShortestSeparator returns the same key for both sides, making it
57
+ // impossible to distinguish the two blocks. Set the sticky flag so that
58
+ // at Finish() time, ALL separators will include encoded seqnos.
59
+ // This mirrors ShortenedIndexBuilder::must_use_separator_with_seq_.
60
+ if (!must_use_separator_with_seq_ && same_user_key) {
61
+ must_use_separator_with_seq_ = true;
62
+ }
63
+
64
+ // Edge case: FindShortestSeparator may fail to shorten the key even when
65
+ // the user keys are different. Example: FindShortestSeparator("abc","abd")
66
+ // returns "abc" unchanged because incrementing 'c' would yield "abd" which
67
+ // is not < limit. When the resulting separator matches the previous entry's
68
+ // separator, the blocks will be grouped into the same run in Finish().
69
+ // We must mark this as a same-user-key boundary so it gets a real seqno
70
+ // rather than kMaxSequenceNumber (which would trigger the overflow block
71
+ // assertion in Finish()).
72
+ if (!same_user_key && !buffered_entries_.empty() &&
73
+ buffered_entries_.back().separator_key == *separator_scratch) {
74
+ same_user_key = true;
75
+ if (!must_use_separator_with_seq_) {
76
+ must_use_separator_with_seq_ = true;
77
+ }
78
+ }
79
+ } else {
80
+ // Last block: use the last key itself as the separator, NOT a shortened
81
+ // successor. This matches the standard ShortenedIndexBuilder behavior
82
+ // (see index_builder.h GetSeparatorWithSeq lines 278-286): it only calls
83
+ // FindShortInternalKeySuccessor when shortening_mode is
84
+ // kShortenSeparatorsAndSuccessor, which is not the default. With the
85
+ // default kShortenSeparators, the last block's separator is simply
86
+ // last_key_in_current_block.
87
+ //
88
+ // Why this matters: FindShortSuccessor can widen the key range. For
89
+ // example, if the actual last key is "9\xff\xff", FindShortSuccessor
90
+ // produces ":" (0x3A). The trie would then claim to cover keys up to
91
+ // ":", but the data block only contains keys up to "9\xff\xff". A seek
92
+ // targeting a key in that gap (e.g., "9\xff\xff\x01") would find a
93
+ // block via the trie that contains no matching data, causing iterator
94
+ // desynchronization — the trie index returns a valid block while the
95
+ // standard index correctly reports no match.
96
+ separator = last_key_in_current_block;
97
+
98
+ // Edge case: if this last block's separator matches the previous entry's
99
+ // separator, they share the same user key (same-user-key run boundary).
100
+ if (!buffered_entries_.empty() &&
101
+ comparator_->Compare(buffered_entries_.back().separator_key,
102
+ separator) == 0) {
103
+ same_user_key = true;
104
+ if (!must_use_separator_with_seq_) {
105
+ must_use_separator_with_seq_ = true;
106
+ }
107
+ }
108
+ }
109
+
110
+ // Buffer the entry for deferred trie construction in Finish().
111
+ // We buffer rather than adding to the trie immediately because the
112
+ // all-or-nothing seqno encoding decision is made at Finish() time.
113
+ TrieBlockHandle handle;
114
+ handle.offset = block_handle.offset;
115
+ handle.size = block_handle.size;
116
+
117
+ BufferedEntry entry;
118
+ entry.separator_key = separator.ToString();
119
+ // For same-user-key boundaries, use the actual seqno of the last key.
120
+ // For different-user-key boundaries, use kMaxSequenceNumber (sentinel
121
+ // meaning "this is not a same-key boundary, never advance past it").
122
+ if (same_user_key) {
123
+ entry.seqno = last_key_seq;
124
+ } else {
125
+ entry.seqno = kMaxSequenceNumber;
126
+ }
127
+ entry.handle = handle;
128
+ buffered_entries_.push_back(std::move(entry));
129
+
130
+ return separator;
131
+ }
132
+
133
+ void TrieIndexBuilder::OnKeyAdded(const Slice& /*key*/, ValueType /*type*/,
134
+ const Slice& /*value*/) {
135
+ // No-op: the trie is built from separator keys in AddIndexEntry(), not
136
+ // from individual key-value pairs.
137
+ }
138
+
139
+ Status TrieIndexBuilder::Finish(Slice* index_contents) {
140
+ if (finished_) {
141
+ return Status::InvalidArgument("TrieIndexBuilder::Finish called twice");
142
+ }
143
+ finished_ = true;
144
+
145
+ // Use seqno side-table when any same-user-key block boundary was detected.
146
+ // The must_use_separator_with_seq_ flag is set in AddIndexEntry() whenever
147
+ // the comparator finds two identical user keys at a block boundary. This
148
+ // always implies duplicate separators exist (since
149
+ // FindShortestSeparator("foo", "foo") = "foo"), so no separate scan is
150
+ // needed.
151
+ bool use_seqno = must_use_separator_with_seq_;
152
+ trie_builder_.SetHasSeqnoEncoding(use_seqno);
153
+
154
+ if (use_seqno) {
155
+ // Feed de-duplicated separators to the trie with seqno side-table metadata.
156
+ // Consecutive identical separators form a "run" — only the first occurrence
157
+ // goes into the trie (as the primary block). The remaining blocks in the
158
+ // run are stored as overflow blocks in the side-table.
159
+ //
160
+ // For non-boundary separators (different user keys), seqno is set to 0
161
+ // (sentinel = "never advance past this leaf"). kMaxSequenceNumber from
162
+ // AddIndexEntry is mapped to 0 here.
163
+ size_t i = 0;
164
+ while (i < buffered_entries_.size()) {
165
+ const auto& entry = buffered_entries_[i];
166
+
167
+ // Count how many consecutive entries share this separator key.
168
+ size_t run_start = i;
169
+ size_t run_end = i + 1;
170
+ while (run_end < buffered_entries_.size() &&
171
+ buffered_entries_[run_end].separator_key == entry.separator_key) {
172
+ run_end++;
173
+ }
174
+ uint32_t block_count = static_cast<uint32_t>(run_end - run_start);
175
+
176
+ // Map kMaxSequenceNumber (non-same-key boundary) to 0 (sentinel).
177
+ uint64_t seqno = (entry.seqno == kMaxSequenceNumber) ? 0 : entry.seqno;
178
+
179
+ // Add the primary (first) block for this separator.
180
+ trie_builder_.AddKeyWithSeqno(Slice(entry.separator_key), entry.handle,
181
+ seqno, block_count);
182
+
183
+ // Add overflow blocks (2nd, 3rd, ... in the run).
184
+ // Overflow blocks only exist within same-key runs, so their seqnos
185
+ // come from last_key_seq in AddIndexEntry (never kMaxSequenceNumber).
186
+ // The seqno may be 0 when bottommost compaction zeroes all sequence
187
+ // numbers — this is valid; see AddOverflowBlock comment.
188
+ for (size_t j = run_start + 1; j < run_end; j++) {
189
+ assert(buffered_entries_[j].seqno != kMaxSequenceNumber);
190
+ trie_builder_.AddOverflowBlock(buffered_entries_[j].handle,
191
+ buffered_entries_[j].seqno);
192
+ }
193
+
194
+ i = run_end;
195
+ }
196
+ } else {
197
+ // Common case: no same-user-key boundaries, add separators directly.
198
+ // Zero overhead — no seqno data stored.
199
+ for (const auto& entry : buffered_entries_) {
200
+ trie_builder_.AddKey(Slice(entry.separator_key), entry.handle);
201
+ }
202
+ }
203
+
204
+ // Release buffered entries — no longer needed after feeding to the trie.
205
+ buffered_entries_.clear();
206
+ buffered_entries_.shrink_to_fit();
207
+
208
+ // Always finish the trie builder, even with 0 keys — this produces a valid
209
+ // serialized trie that can be parsed by NewReader. Without this, an empty
210
+ // Slice would be returned, causing InitFromData to fail with "data too short
211
+ // for header".
212
+ trie_builder_.Finish();
213
+ *index_contents = trie_builder_.GetSerializedData();
214
+ return Status::OK();
215
+ }
216
+
217
+ // ============================================================================
218
+ // TrieIndexIterator
219
+ // ============================================================================
220
+
221
+ TrieIndexIterator::TrieIndexIterator(const LoudsTrie* trie,
222
+ const Comparator* comparator,
223
+ bool has_seqno_encoding)
224
+ : comparator_(comparator),
225
+ iter_(trie),
226
+ trie_(trie),
227
+ current_scan_idx_(0),
228
+ prepared_(false),
229
+ has_seqno_encoding_(has_seqno_encoding),
230
+ overflow_run_index_(0),
231
+ overflow_run_size_(1),
232
+ overflow_base_idx_(0) {}
233
+
234
+ void TrieIndexIterator::Prepare(const ScanOptions scan_opts[],
235
+ size_t num_opts) {
236
+ scan_opts_.clear();
237
+ scan_opts_.reserve(num_opts);
238
+ for (size_t i = 0; i < num_opts; i++) {
239
+ scan_opts_.push_back(scan_opts[i]);
240
+ }
241
+ current_scan_idx_ = 0;
242
+ prepared_ = true;
243
+ }
244
+
245
+ Status TrieIndexIterator::SeekToFirstAndGetResult(IterateResult* result) {
246
+ // Reset overflow state — SeekToFirst always lands on the primary block
247
+ // of the first trie leaf.
248
+ overflow_run_index_ = 0;
249
+ overflow_run_size_ = 1;
250
+ overflow_base_idx_ = 0;
251
+
252
+ if (!iter_.SeekToFirst()) {
253
+ result->bound_check_result = IterBoundCheck::kUnknown;
254
+ result->key = Slice();
255
+ return Status::OK();
256
+ }
257
+
258
+ result->key = iter_.Key();
259
+ current_key_scratch_ = result->key.ToString();
260
+ result->key = Slice(current_key_scratch_);
261
+
262
+ // Set up overflow state for the first leaf if seqno encoding is active.
263
+ if (has_seqno_encoding_) {
264
+ uint64_t leaf_idx = iter_.LeafIndex();
265
+ uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
266
+ overflow_run_size_ = block_count;
267
+ overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
268
+ }
269
+
270
+ // The very first entry is always in bounds (no target to compare against
271
+ // the limit, and the first block cannot precede any scan range).
272
+ result->bound_check_result = IterBoundCheck::kInbound;
273
+ return Status::OK();
274
+ }
275
+
276
+ Status TrieIndexIterator::SeekAndGetResult(const Slice& target,
277
+ IterateResult* result,
278
+ const SeekContext& context) {
279
+ SequenceNumber target_seq = context.target_seq;
280
+
281
+ // Advance current_scan_idx_ past any scans whose limit <= target.
282
+ // This handles the multi-scan case where the caller seeks into a later
283
+ // scan range after the previous scan returned kOutOfBound.
284
+ if (prepared_) {
285
+ while (current_scan_idx_ < scan_opts_.size()) {
286
+ const auto& opts = scan_opts_[current_scan_idx_];
287
+ if (opts.range.limit.has_value() &&
288
+ comparator_->Compare(target, opts.range.limit.value()) >= 0) {
289
+ current_scan_idx_++;
290
+ } else {
291
+ break;
292
+ }
293
+ }
294
+ }
295
+
296
+ // Reset overflow state.
297
+ overflow_run_index_ = 0;
298
+ overflow_run_size_ = 1;
299
+ overflow_base_idx_ = 0;
300
+
301
+ // Always seek with user key only — the trie stores user-key separators.
302
+ // When seqno encoding is active, post-seek correction handles the seqno.
303
+ if (!iter_.Seek(target)) {
304
+ // No leaf has a key >= target: the target is past all blocks in this SST.
305
+ // Return kUnknown (not kOutOfBound) because exhausting this SST's trie
306
+ // says nothing about the upper bound — the next SST on the level may
307
+ // still contain in-bound keys. kOutOfBound would cause LevelIterator to
308
+ // stop scanning the level prematurely.
309
+ result->bound_check_result = IterBoundCheck::kUnknown;
310
+ result->key = Slice();
311
+ return Status::OK();
312
+ }
313
+
314
+ // Set the result key (always a user key, no suffix stripping needed).
315
+ result->key = iter_.Key();
316
+ current_key_scratch_ = result->key.ToString();
317
+ result->key = Slice(current_key_scratch_);
318
+
319
+ // ---- Post-seek correction for seqno side-table ----
320
+ //
321
+ // When has_seqno_encoding_ is true, the leaf we landed on may have a
322
+ // seqno side-table entry. We use it to determine if this is the right
323
+ // block for the given (target, target_seq).
324
+ //
325
+ // The trie stores separators that are upper bounds on block contents:
326
+ // separator_key >= all keys in the block
327
+ // separator_seqno = seqno of the last key written to the block
328
+ //
329
+ // For same-user-key boundaries, the separator IS the user key. The seqno
330
+ // determines which block within a run of same-key blocks is correct:
331
+ // - If target_seq >= leaf_seqno: this is the right block (target's
332
+ // internal key <= separator's internal key, because higher seqno means
333
+ // "smaller" internal key for the same user key)
334
+ // - If target_seq < leaf_seqno: target's internal key > separator,
335
+ // so we need to advance to the next block in the run
336
+ //
337
+ // For non-boundary leaves (leaf_seqno == 0), the `leaf_seqno != 0` guard
338
+ // short-circuits before the comparison, so we never advance. This is the
339
+ // zero-overhead common path.
340
+ if (has_seqno_encoding_ && iter_.Valid()) {
341
+ uint64_t leaf_idx = iter_.LeafIndex();
342
+ uint64_t leaf_seqno = trie_->GetLeafSeqno(leaf_idx);
343
+
344
+ if (leaf_seqno != 0 && target_seq < leaf_seqno) {
345
+ // Target's internal key is AFTER the separator (lower seqno = later
346
+ // in internal key order for same user key). Advance through overflow
347
+ // blocks.
348
+ uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
349
+ uint32_t base = trie_->GetOverflowBase(leaf_idx);
350
+
351
+ bool found = false;
352
+ for (uint32_t oi = 0; oi < block_count - 1; oi++) {
353
+ uint64_t ov_seqno = trie_->GetOverflowSeqno(base + oi);
354
+ if (ov_seqno == 0 || target_seq >= ov_seqno) {
355
+ // This overflow block is the right one.
356
+ overflow_run_index_ = oi + 1; // 1-based (0 = primary)
357
+ overflow_run_size_ = block_count;
358
+ overflow_base_idx_ = base;
359
+ found = true;
360
+ break;
361
+ }
362
+ }
363
+
364
+ if (!found) {
365
+ // target_seq is below all seqnos in this run. Advance to the next
366
+ // trie leaf (the block after the run).
367
+ if (!iter_.Next()) {
368
+ // Exhausted all blocks: target is past the end of this SST.
369
+ // Return kUnknown — see comment in Seek path above.
370
+ result->bound_check_result = IterBoundCheck::kUnknown;
371
+ result->key = Slice();
372
+ return Status::OK();
373
+ }
374
+ // Update key and overflow state for the new leaf.
375
+ result->key = iter_.Key();
376
+ current_key_scratch_ = result->key.ToString();
377
+ result->key = Slice(current_key_scratch_);
378
+ overflow_run_index_ = 0;
379
+ overflow_run_size_ = 1;
380
+ overflow_base_idx_ = 0;
381
+ // Check if the new leaf also has overflow (unlikely but possible
382
+ // with adjacent same-key runs for different user keys).
383
+ // iter_.Valid() is guaranteed here — Next() returned true above.
384
+ if (has_seqno_encoding_) {
385
+ uint64_t new_leaf = iter_.LeafIndex();
386
+ overflow_run_size_ = trie_->GetLeafBlockCount(new_leaf);
387
+ overflow_base_idx_ = trie_->GetOverflowBase(new_leaf);
388
+ }
389
+ }
390
+ } else {
391
+ // Right block (common path). Set overflow state in case this leaf
392
+ // has a run (for subsequent Next() calls).
393
+ uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
394
+ overflow_run_index_ = 0;
395
+ overflow_run_size_ = block_count;
396
+ overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
397
+ }
398
+ }
399
+
400
+ result->bound_check_result = CheckBounds(target);
401
+ return Status::OK();
402
+ }
403
+
404
+ Status TrieIndexIterator::NextAndGetResult(IterateResult* result) {
405
+ // Save the current separator (user key) as "previous" before advancing.
406
+ prev_key_scratch_ = current_key_scratch_;
407
+
408
+ // If we're in an overflow run and haven't exhausted it, advance within
409
+ // the run (no trie traversal needed — just increment the overflow index).
410
+ if (overflow_run_index_ + 1 < overflow_run_size_) {
411
+ overflow_run_index_++;
412
+ // The key doesn't change (same separator for all blocks in the run).
413
+ result->key = Slice(current_key_scratch_);
414
+ result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
415
+ return Status::OK();
416
+ }
417
+
418
+ // Advance to the next trie leaf.
419
+ overflow_run_index_ = 0;
420
+ overflow_run_size_ = 1;
421
+ overflow_base_idx_ = 0;
422
+
423
+ if (!iter_.Next()) {
424
+ // No more blocks: past the end of this SST.
425
+ // Return kUnknown — see comment in Seek path above.
426
+ result->bound_check_result = IterBoundCheck::kUnknown;
427
+ result->key = Slice();
428
+ return Status::OK();
429
+ }
430
+
431
+ result->key = iter_.Key();
432
+ current_key_scratch_ = result->key.ToString();
433
+ result->key = Slice(current_key_scratch_);
434
+
435
+ // Set overflow state for the new leaf.
436
+ if (has_seqno_encoding_ && iter_.Valid()) {
437
+ uint64_t leaf_idx = iter_.LeafIndex();
438
+ overflow_run_size_ = trie_->GetLeafBlockCount(leaf_idx);
439
+ overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
440
+ }
441
+
442
+ result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
443
+ return Status::OK();
444
+ }
445
+
446
+ UserDefinedIndexBuilder::BlockHandle TrieIndexIterator::value() {
447
+ if (overflow_run_index_ == 0) {
448
+ // Primary block — use the trie leaf's handle.
449
+ auto handle = iter_.Value();
450
+ return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
451
+ }
452
+ // Overflow block — use the side-table handle.
453
+ // overflow_run_index_ is 1-based, overflow array is 0-based.
454
+ uint32_t overflow_idx = overflow_base_idx_ + overflow_run_index_ - 1;
455
+ auto handle = trie_->GetOverflowHandle(overflow_idx);
456
+ return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
457
+ }
458
+
459
+ IterBoundCheck TrieIndexIterator::CheckBounds(
460
+ const Slice& reference_key) const {
461
+ if (!prepared_ || scan_opts_.empty()) {
462
+ // No bounds to check — always in-bound.
463
+ return IterBoundCheck::kInbound;
464
+ }
465
+
466
+ if (current_scan_idx_ >= scan_opts_.size()) {
467
+ return IterBoundCheck::kOutOfBound;
468
+ }
469
+
470
+ const auto& opts = scan_opts_[current_scan_idx_];
471
+
472
+ // Check upper bound (limit) against the reference key, NOT the current
473
+ // separator. The trie stores separator keys (upper bounds on block
474
+ // contents), so comparing the separator against the limit would
475
+ // prematurely reject blocks that contain keys < limit.
476
+ //
477
+ // For Seek: reference_key = seek target. If target < limit, the found
478
+ // block may contain keys within bounds.
479
+ // For Next: reference_key = previous separator. If prev_sep < limit,
480
+ // the current block may contain keys within bounds.
481
+ //
482
+ // This is conservative: it may return kInbound for a block that is fully
483
+ // out of bounds. The data-level iterator handles per-key filtering.
484
+ if (opts.range.limit.has_value()) {
485
+ const Slice& limit = opts.range.limit.value();
486
+ if (comparator_->Compare(reference_key, limit) >= 0) {
487
+ return IterBoundCheck::kOutOfBound;
488
+ }
489
+ }
490
+
491
+ return IterBoundCheck::kInbound;
492
+ }
493
+
494
+ // ============================================================================
495
+ // TrieIndexReader
496
+ // ============================================================================
497
+
498
+ TrieIndexReader::TrieIndexReader(const Comparator* comparator)
499
+ : comparator_(comparator), data_size_(0) {}
500
+
501
+ Status TrieIndexReader::InitFromSlice(const Slice& data) {
502
+ data_size_ = data.size();
503
+ return trie_.InitFromData(data);
504
+ }
505
+
506
+ std::unique_ptr<UserDefinedIndexIterator> TrieIndexReader::NewIterator(
507
+ const ReadOptions& /*read_options*/) {
508
+ return std::make_unique<TrieIndexIterator>(&trie_, comparator_,
509
+ trie_.HasSeqnoEncoding());
510
+ }
511
+
512
+ size_t TrieIndexReader::ApproximateMemoryUsage() const {
513
+ // The trie uses zero-copy pointers into the serialized data for bitvectors
514
+ // and handle arrays, so the base cost is the serialized data size. On top
515
+ // of that, InitFromData() heap-allocates child position lookup tables
516
+ // (s_child_start_pos_ and s_child_end_pos_) for Select-free sparse
517
+ // traversal — 8 bytes per sparse internal node.
518
+ return data_size_ + trie_.ApproximateAuxMemoryUsage();
519
+ }
520
+
521
+ // ============================================================================
522
+ // TrieIndexFactory
523
+ // ============================================================================
524
+
525
+ Status TrieIndexFactory::NewBuilder(
526
+ const UserDefinedIndexOption& option,
527
+ std::unique_ptr<UserDefinedIndexBuilder>& builder) const {
528
+ // The trie traverses keys byte-by-byte in lexicographic order, so it
529
+ // requires a bytewise comparator. Non-bytewise comparators (e.g.,
530
+ // ReverseBytewiseComparator or custom comparators) would produce separator
531
+ // keys in a different order than the trie's byte-level traversal, causing
532
+ // incorrect Seek results.
533
+ if (option.comparator != nullptr &&
534
+ option.comparator != BytewiseComparator()) {
535
+ return Status::NotSupported(
536
+ "TrieIndexFactory requires BytewiseComparator; got: ",
537
+ option.comparator->Name());
538
+ }
539
+ // Default to BytewiseComparator when null. The trie requires a bytewise
540
+ // comparator for separator key ordering; null would cause a dereference
541
+ // crash in AddIndexEntry when comparing keys.
542
+ const Comparator* cmp =
543
+ option.comparator ? option.comparator : BytewiseComparator();
544
+ builder = std::make_unique<TrieIndexBuilder>(cmp);
545
+ return Status::OK();
546
+ }
547
+
548
+ Status TrieIndexFactory::NewReader(
549
+ const UserDefinedIndexOption& option, Slice& index_block,
550
+ std::unique_ptr<UserDefinedIndexReader>& reader) const {
551
+ const Comparator* cmp =
552
+ option.comparator ? option.comparator : BytewiseComparator();
553
+ if (cmp != BytewiseComparator()) {
554
+ return Status::NotSupported(
555
+ "TrieIndexFactory requires BytewiseComparator; got: ", cmp->Name());
556
+ }
557
+ auto trie_reader = std::make_unique<TrieIndexReader>(cmp);
558
+ Status s = trie_reader->InitFromSlice(index_block);
559
+ if (!s.ok()) {
560
+ return s;
561
+ }
562
+ reader = std::move(trie_reader);
563
+ return Status::OK();
564
+ }
565
+
566
+ } // namespace trie_index
567
+ } // namespace ROCKSDB_NAMESPACE