@nxtedition/rocksdb 9.0.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. package/binding.cc +244 -177
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -9
  3. package/deps/rocksdb/rocksdb/Makefile +15 -6
  4. package/deps/rocksdb/rocksdb/README.md +29 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +17 -2
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +74 -15
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
  10. package/deps/rocksdb/rocksdb/cache/cache_test.cc +16 -4
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
  12. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2024 -14
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.h +349 -23
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +126 -51
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -0
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +202 -7
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +31 -14
  19. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -33
  20. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +314 -25
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +29 -4
  22. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +10 -0
  23. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -3
  24. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
  25. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
  26. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
  27. package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
  28. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +25 -11
  29. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
  30. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -1
  32. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
  34. package/deps/rocksdb/rocksdb/db/builder.cc +3 -3
  35. package/deps/rocksdb/rocksdb/db/c.cc +64 -0
  36. package/deps/rocksdb/rocksdb/db/c_test.c +36 -0
  37. package/deps/rocksdb/rocksdb/db/column_family.cc +23 -15
  38. package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
  39. package/deps/rocksdb/rocksdb/db/column_family_test.cc +101 -5
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +36 -23
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +24 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +3 -5
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -18
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +7 -3
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -6
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +61 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +146 -64
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +13 -39
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -0
  53. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -7
  54. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +8 -3
  55. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +59 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +186 -2
  58. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +17 -5
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +519 -240
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +104 -43
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +169 -66
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -1
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +12 -4
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -14
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +85 -53
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -7
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +99 -82
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +4 -14
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +24 -21
  71. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
  72. package/deps/rocksdb/rocksdb/db/db_iter.cc +83 -55
  73. package/deps/rocksdb/rocksdb/db/db_iter.h +10 -2
  74. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
  75. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
  76. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
  77. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +187 -1
  78. package/deps/rocksdb/rocksdb/db/db_options_test.cc +258 -0
  79. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +258 -0
  80. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +1 -0
  81. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
  82. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
  83. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
  84. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +3 -1
  85. package/deps/rocksdb/rocksdb/db/db_test.cc +134 -30
  86. package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
  87. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -6
  88. package/deps/rocksdb/rocksdb/db/db_test_util.h +5 -2
  89. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +1 -0
  90. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -0
  91. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +337 -1
  92. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
  93. package/deps/rocksdb/rocksdb/db/error_handler.cc +51 -34
  94. package/deps/rocksdb/rocksdb/db/error_handler.h +7 -6
  95. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +58 -0
  96. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -3
  98. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +2 -1
  99. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +2 -0
  100. package/deps/rocksdb/rocksdb/db/memtable.cc +18 -70
  101. package/deps/rocksdb/rocksdb/db/memtable_list.cc +1 -1
  102. package/deps/rocksdb/rocksdb/db/memtable_list.h +11 -1
  103. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +1 -1
  104. package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
  105. package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
  106. package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
  107. package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
  108. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +3 -0
  109. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
  110. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
  112. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
  113. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
  114. package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
  115. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +454 -70
  116. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
  117. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
  118. package/deps/rocksdb/rocksdb/db/table_cache.cc +32 -19
  119. package/deps/rocksdb/rocksdb/db/table_cache.h +12 -6
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +10 -4
  121. package/deps/rocksdb/rocksdb/db/version_set.cc +75 -73
  122. package/deps/rocksdb/rocksdb/db/version_set.h +8 -8
  123. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
  124. package/deps/rocksdb/rocksdb/db/version_set_test.cc +22 -11
  125. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
  126. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
  127. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
  128. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
  129. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
  130. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
  131. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +44 -20
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +15 -10
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +108 -58
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +36 -14
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +1 -1
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +195 -130
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -2
  144. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +12 -12
  145. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
  146. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +14 -11
  149. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +44 -38
  150. package/deps/rocksdb/rocksdb/env/env.cc +5 -0
  151. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +1 -0
  152. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +50 -29
  153. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +32 -2
  154. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +513 -30
  155. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
  156. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +38 -13
  157. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -7
  158. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +42 -0
  159. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +65 -12
  160. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
  161. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +26 -0
  162. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +37 -4
  163. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +2 -0
  164. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -0
  165. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
  166. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
  167. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +4 -0
  168. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
  169. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
  170. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +45 -5
  171. package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
  173. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +79 -8
  174. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +16 -0
  175. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
  176. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
  177. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +14 -3
  178. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +2 -0
  179. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +9 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -2
  186. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -2
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
  188. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  189. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  190. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  191. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
  192. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +4 -0
  193. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -0
  194. package/deps/rocksdb/rocksdb/options/db_options.cc +47 -2
  195. package/deps/rocksdb/rocksdb/options/db_options.h +3 -0
  196. package/deps/rocksdb/rocksdb/options/options_helper.cc +12 -0
  197. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  198. package/deps/rocksdb/rocksdb/options/options_test.cc +6 -1
  199. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  200. package/deps/rocksdb/rocksdb/port/README +10 -0
  201. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  202. package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
  203. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
  204. package/deps/rocksdb/rocksdb/port/stack_trace.cc +5 -0
  205. package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
  206. package/deps/rocksdb/rocksdb/src.mk +7 -1
  207. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  208. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +3 -1
  209. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +275 -61
  210. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +96 -4
  211. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +179 -62
  212. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +35 -22
  213. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
  214. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +14 -9
  215. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
  216. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
  217. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +15 -12
  218. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +10 -5
  219. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +39 -18
  220. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
  221. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
  222. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  224. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +10 -8
  225. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -2
  226. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  228. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +3 -2
  229. package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
  230. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +6 -2
  231. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
  232. package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
  233. package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
  234. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
  235. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +12 -0
  236. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +33 -6
  237. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
  238. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +2 -4
  239. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
  240. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
  241. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
  242. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
  243. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -43
  244. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -28
  245. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
  246. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +0 -1
  247. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -10
  248. package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
  249. package/deps/rocksdb/rocksdb/util/cast_util.h +10 -0
  250. package/deps/rocksdb/rocksdb/util/comparator.cc +26 -1
  251. package/deps/rocksdb/rocksdb/util/compression.h +9 -3
  252. package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
  253. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
  254. package/deps/rocksdb/rocksdb/util/overload.h +23 -0
  255. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
  256. package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
  257. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
  258. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
  259. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  260. package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
  261. package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
  262. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -0
  263. package/deps/rocksdb/rocksdb/util/udt_util.cc +42 -0
  264. package/deps/rocksdb/rocksdb/util/udt_util.h +19 -0
  265. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +14 -0
  266. package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
  267. package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
  268. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2 -1
  269. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
  270. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +19 -15
  271. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
  272. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +5 -0
  273. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
  274. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +9 -0
  275. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
  276. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
  277. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +41 -0
  278. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
  279. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -0
  280. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +155 -0
  281. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  282. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +81 -1
  283. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -6
  284. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +7 -5
  285. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +2 -1
  286. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -2
  287. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -27
  288. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
  289. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
  290. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +105 -8
  291. package/deps/rocksdb/rocksdb.gyp +4 -2
  292. package/index.js +38 -55
  293. package/package.json +4 -4
  294. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  295. package/util.h +7 -1
  296. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
  297. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
  298. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
  299. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
  300. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
  301. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
  302. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
  303. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
  304. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
  305. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
  306. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  307. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -88,19 +88,23 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
88
88
  // Explicitly instantiate templates for each "blocklike" type we use (and
89
89
  // before implicit specialization).
90
90
  // This makes it possible to keep the template definitions in the .cc file.
91
- #define INSTANTIATE_BLOCKLIKE_TEMPLATES(T) \
92
- template Status BlockBasedTable::RetrieveBlock<T>( \
93
- FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \
94
- const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
95
- CachableEntry<T>* out_parsed_block, GetContext* get_context, \
96
- BlockCacheLookupContext* lookup_context, bool for_compaction, \
97
- bool use_cache, bool async_read) const; \
98
- template Status BlockBasedTable::MaybeReadBlockAndLoadToCache<T>( \
99
- FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \
100
- const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
101
- bool for_compaction, CachableEntry<T>* block_entry, \
102
- GetContext* get_context, BlockCacheLookupContext* lookup_context, \
103
- BlockContents* contents, bool async_read) const;
91
+ #define INSTANTIATE_BLOCKLIKE_TEMPLATES(T) \
92
+ template Status BlockBasedTable::RetrieveBlock<T>( \
93
+ FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \
94
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
95
+ CachableEntry<T>* out_parsed_block, GetContext* get_context, \
96
+ BlockCacheLookupContext* lookup_context, bool for_compaction, \
97
+ bool use_cache, bool async_read, bool use_block_cache_for_lookup) const; \
98
+ template Status BlockBasedTable::MaybeReadBlockAndLoadToCache<T>( \
99
+ FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \
100
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
101
+ bool for_compaction, CachableEntry<T>* block_entry, \
102
+ GetContext* get_context, BlockCacheLookupContext* lookup_context, \
103
+ BlockContents* contents, bool async_read, \
104
+ bool use_block_cache_for_lookup) const; \
105
+ template Status BlockBasedTable::LookupAndPinBlocksInCache<T>( \
106
+ const ReadOptions& ro, const BlockHandle& handle, \
107
+ CachableEntry<T>* out_parsed_block) const;
104
108
 
105
109
  INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock);
106
110
  INSTANTIATE_BLOCKLIKE_TEMPLATES(UncompressionDict);
@@ -682,7 +686,7 @@ Status BlockBasedTable::Open(
682
686
  rep->table_properties->compression_name ==
683
687
  CompressionTypeToString(kZSTDNotFinalCompression));
684
688
  rep->create_context = BlockCreateContext(
685
- &rep->table_options, rep->ioptions.stats,
689
+ &rep->table_options, &rep->ioptions, rep->ioptions.stats,
686
690
  blocks_definitely_zstd_compressed, block_protection_bytes_per_key,
687
691
  rep->internal_comparator.user_comparator(), rep->index_value_is_full,
688
692
  rep->index_has_first_key);
@@ -884,6 +888,7 @@ Status BlockBasedTable::PrefetchTail(
884
888
  true /* track_min_offset */, false /* implicit_auto_readahead */,
885
889
  0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
886
890
  0 /* upper_bound_offset */, nullptr /* fs */, nullptr /* clock */, stats,
891
+ /* readahead_cb */ nullptr,
887
892
  FilePrefetchBufferUsage::kTableOpenPrefetchTail));
888
893
 
889
894
  if (s.ok()) {
@@ -994,7 +999,8 @@ Status BlockBasedTable::ReadRangeDelBlock(
994
999
  read_options, range_del_handle,
995
1000
  /*input_iter=*/nullptr, BlockType::kRangeDeletion,
996
1001
  /*get_context=*/nullptr, lookup_context, prefetch_buffer,
997
- /*for_compaction= */ false, /*async_read= */ false, tmp_status));
1002
+ /*for_compaction= */ false, /*async_read= */ false, tmp_status,
1003
+ /*use_block_cache_for_lookup=*/true));
998
1004
  assert(iter != nullptr);
999
1005
  s = iter->status();
1000
1006
  if (!s.ok()) {
@@ -1301,8 +1307,8 @@ Cache::Priority BlockBasedTable::GetCachePriority() const {
1301
1307
  template <typename TBlocklike>
1302
1308
  WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1303
1309
  const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
1304
- CachableEntry<TBlocklike>* out_parsed_block,
1305
- GetContext* get_context) const {
1310
+ CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
1311
+ const UncompressionDict* dict) const {
1306
1312
  assert(out_parsed_block);
1307
1313
  assert(out_parsed_block->IsEmpty());
1308
1314
 
@@ -1311,10 +1317,12 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1311
1317
 
1312
1318
  // Lookup uncompressed cache first
1313
1319
  if (block_cache) {
1320
+ BlockCreateContext create_ctx = rep_->create_context;
1321
+ create_ctx.dict = dict;
1314
1322
  assert(!cache_key.empty());
1315
1323
  auto cache_handle = block_cache.LookupFull(
1316
- cache_key, &rep_->create_context, GetCachePriority<TBlocklike>(),
1317
- statistics, rep_->ioptions.lowest_used_cache_tier);
1324
+ cache_key, &create_ctx, GetCachePriority<TBlocklike>(), statistics,
1325
+ rep_->ioptions.lowest_used_cache_tier);
1318
1326
 
1319
1327
  // Avoid updating metrics here if the handle is not complete yet. This
1320
1328
  // happens with MultiGet and secondary cache. So update the metrics only
@@ -1341,8 +1349,9 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1341
1349
  template <typename TBlocklike>
1342
1350
  WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
1343
1351
  const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
1344
- CachableEntry<TBlocklike>* out_parsed_block, BlockContents&& block_contents,
1345
- CompressionType block_comp_type,
1352
+ CachableEntry<TBlocklike>* out_parsed_block,
1353
+ BlockContents&& uncompressed_block_contents,
1354
+ BlockContents&& compressed_block_contents, CompressionType block_comp_type,
1346
1355
  const UncompressionDict& uncompression_dict,
1347
1356
  MemoryAllocator* memory_allocator, GetContext* get_context) const {
1348
1357
  const ImmutableOptions& ioptions = rep_->ioptions;
@@ -1354,23 +1363,22 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
1354
1363
  Statistics* statistics = ioptions.stats;
1355
1364
 
1356
1365
  std::unique_ptr<TBlocklike> block_holder;
1357
- if (block_comp_type != kNoCompression) {
1366
+ if (block_comp_type != kNoCompression &&
1367
+ uncompressed_block_contents.data.empty()) {
1368
+ assert(compressed_block_contents.data.data());
1358
1369
  // Retrieve the uncompressed contents into a new buffer
1359
- BlockContents uncompressed_block_contents;
1360
1370
  UncompressionContext context(block_comp_type);
1361
1371
  UncompressionInfo info(context, uncompression_dict, block_comp_type);
1362
- s = UncompressBlockData(info, block_contents.data.data(),
1363
- block_contents.data.size(),
1372
+ s = UncompressBlockData(info, compressed_block_contents.data.data(),
1373
+ compressed_block_contents.data.size(),
1364
1374
  &uncompressed_block_contents, format_version,
1365
1375
  ioptions, memory_allocator);
1366
1376
  if (!s.ok()) {
1367
1377
  return s;
1368
1378
  }
1369
- rep_->create_context.Create(&block_holder,
1370
- std::move(uncompressed_block_contents));
1371
- } else {
1372
- rep_->create_context.Create(&block_holder, std::move(block_contents));
1373
1379
  }
1380
+ rep_->create_context.Create(&block_holder,
1381
+ std::move(uncompressed_block_contents));
1374
1382
 
1375
1383
  // insert into uncompressed block cache
1376
1384
  if (block_cache && block_holder->own_bytes()) {
@@ -1378,7 +1386,8 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
1378
1386
  BlockCacheTypedHandle<TBlocklike>* cache_handle = nullptr;
1379
1387
  s = block_cache.InsertFull(cache_key, block_holder.get(), charge,
1380
1388
  &cache_handle, GetCachePriority<TBlocklike>(),
1381
- rep_->ioptions.lowest_used_cache_tier);
1389
+ rep_->ioptions.lowest_used_cache_tier,
1390
+ compressed_block_contents.data, block_comp_type);
1382
1391
 
1383
1392
  if (s.ok()) {
1384
1393
  assert(cache_handle != nullptr);
@@ -1465,6 +1474,62 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
1465
1474
  block_contents_pinned, rep->user_defined_timestamps_persisted);
1466
1475
  }
1467
1476
 
1477
+ // Right now only called for Data blocks.
1478
+ template <typename TBlocklike>
1479
+ Status BlockBasedTable::LookupAndPinBlocksInCache(
1480
+ const ReadOptions& ro, const BlockHandle& handle,
1481
+ CachableEntry<TBlocklike>* out_parsed_block) const {
1482
+ BlockCacheInterface<TBlocklike> block_cache{
1483
+ rep_->table_options.block_cache.get()};
1484
+
1485
+ assert(block_cache);
1486
+
1487
+ Status s;
1488
+ CachableEntry<UncompressionDict> uncompression_dict;
1489
+ if (rep_->uncompression_dict_reader) {
1490
+ const bool no_io = (ro.read_tier == kBlockCacheTier);
1491
+ s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
1492
+ /* prefetch_buffer= */ nullptr, ro, no_io, ro.verify_checksums,
1493
+ /* get_context= */ nullptr, /* lookup_context= */ nullptr,
1494
+ &uncompression_dict);
1495
+ if (!s.ok()) {
1496
+ return s;
1497
+ }
1498
+ }
1499
+
1500
+ // Do the lookup.
1501
+ CacheKey key_data = GetCacheKey(rep_->base_cache_key, handle);
1502
+ const Slice key = key_data.AsSlice();
1503
+
1504
+ Statistics* statistics = rep_->ioptions.statistics.get();
1505
+
1506
+ BlockCreateContext create_ctx = rep_->create_context;
1507
+ create_ctx.dict = uncompression_dict.GetValue()
1508
+ ? uncompression_dict.GetValue()
1509
+ : &UncompressionDict::GetEmptyDict();
1510
+
1511
+ auto cache_handle =
1512
+ block_cache.LookupFull(key, &create_ctx, GetCachePriority<TBlocklike>(),
1513
+ statistics, rep_->ioptions.lowest_used_cache_tier);
1514
+
1515
+ if (!cache_handle) {
1516
+ UpdateCacheMissMetrics(TBlocklike::kBlockType, /* get_context = */ nullptr);
1517
+ return s;
1518
+ }
1519
+
1520
+ // Found in Cache.
1521
+ TBlocklike* value = block_cache.Value(cache_handle);
1522
+ if (value) {
1523
+ UpdateCacheHitMetrics(TBlocklike::kBlockType, /* get_context = */ nullptr,
1524
+ block_cache.get()->GetUsage(cache_handle));
1525
+ }
1526
+ out_parsed_block->SetCachedValue(value, block_cache.get(), cache_handle);
1527
+
1528
+ assert(!out_parsed_block->IsEmpty());
1529
+
1530
+ return s;
1531
+ }
1532
+
1468
1533
  // If contents is nullptr, this function looks up the block caches for the
1469
1534
  // data block referenced by handle, and read the block from disk if necessary.
1470
1535
  // If contents is non-null, it skips the cache lookup and disk read, since
@@ -1477,12 +1542,12 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1477
1542
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1478
1543
  bool for_compaction, CachableEntry<TBlocklike>* out_parsed_block,
1479
1544
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
1480
- BlockContents* contents, bool async_read) const {
1545
+ BlockContents* contents, bool async_read,
1546
+ bool use_block_cache_for_lookup) const {
1481
1547
  assert(out_parsed_block != nullptr);
1482
1548
  const bool no_io = (ro.read_tier == kBlockCacheTier);
1483
1549
  BlockCacheInterface<TBlocklike> block_cache{
1484
1550
  rep_->table_options.block_cache.get()};
1485
-
1486
1551
  // First, try to get the block from the cache
1487
1552
  //
1488
1553
  // If either block cache is enabled, we'll try to read from it.
@@ -1496,21 +1561,25 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1496
1561
  key = key_data.AsSlice();
1497
1562
 
1498
1563
  if (!contents) {
1499
- s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
1500
- get_context);
1501
- // Value could still be null at this point, so check the cache handle
1502
- // and update the read pattern for prefetching
1503
- if (out_parsed_block->GetValue() || out_parsed_block->GetCacheHandle()) {
1504
- // TODO(haoyu): Differentiate cache hit on uncompressed block cache and
1505
- // compressed block cache.
1506
- is_cache_hit = true;
1507
- if (prefetch_buffer) {
1508
- // Update the block details so that PrefetchBuffer can use the read
1509
- // pattern to determine if reads are sequential or not for
1510
- // prefetching. It should also take in account blocks read from cache.
1511
- prefetch_buffer->UpdateReadPattern(
1512
- handle.offset(), BlockSizeWithTrailer(handle),
1513
- ro.adaptive_readahead /*decrease_readahead_size*/);
1564
+ if (use_block_cache_for_lookup) {
1565
+ s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
1566
+ get_context, &uncompression_dict);
1567
+ // Value could still be null at this point, so check the cache handle
1568
+ // and update the read pattern for prefetching
1569
+ if (out_parsed_block->GetValue() ||
1570
+ out_parsed_block->GetCacheHandle()) {
1571
+ // TODO(haoyu): Differentiate cache hit on uncompressed block cache
1572
+ // and compressed block cache.
1573
+ is_cache_hit = true;
1574
+ if (prefetch_buffer) {
1575
+ // Update the block details so that PrefetchBuffer can use the read
1576
+ // pattern to determine if reads are sequential or not for
1577
+ // prefetching. It should also take in account blocks read from
1578
+ // cache.
1579
+ prefetch_buffer->UpdateReadPattern(
1580
+ handle.offset(), BlockSizeWithTrailer(handle),
1581
+ ro.adaptive_readahead /*decrease_readahead_size*/);
1582
+ }
1514
1583
  }
1515
1584
  }
1516
1585
  }
@@ -1525,14 +1594,26 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1525
1594
  TBlocklike::kBlockType != BlockType::kFilter &&
1526
1595
  TBlocklike::kBlockType != BlockType::kCompressionDictionary &&
1527
1596
  rep_->blocks_maybe_compressed;
1597
+ // This flag, if true, tells BlockFetcher to return the uncompressed
1598
+ // block when ReadBlockContents() is called.
1528
1599
  const bool do_uncompress = maybe_compressed;
1529
1600
  CompressionType contents_comp_type;
1530
1601
  // Maybe serialized or uncompressed
1531
1602
  BlockContents tmp_contents;
1603
+ BlockContents uncomp_contents;
1604
+ BlockContents comp_contents;
1532
1605
  if (!contents) {
1533
1606
  Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS
1534
1607
  : READ_BLOCK_GET_MICROS;
1535
1608
  StopWatch sw(rep_->ioptions.clock, statistics, histogram);
1609
+ // Setting do_uncompress to false may cause an extra mempcy in the
1610
+ // following cases -
1611
+ // 1. Compression is enabled, but block is not actually compressed
1612
+ // 2. Compressed block is in the prefetch buffer
1613
+ // 3. Direct IO
1614
+ //
1615
+ // It would also cause a memory allocation to be used rather than
1616
+ // stack if the compressed block size is < 5KB
1536
1617
  BlockFetcher block_fetcher(
1537
1618
  rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
1538
1619
  &tmp_contents, rep_->ioptions, do_uncompress, maybe_compressed,
@@ -1553,7 +1634,6 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1553
1634
  }
1554
1635
 
1555
1636
  contents_comp_type = block_fetcher.get_compression_type();
1556
- contents = &tmp_contents;
1557
1637
  if (get_context) {
1558
1638
  switch (TBlocklike::kBlockType) {
1559
1639
  case BlockType::kIndex:
@@ -1567,17 +1647,43 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1567
1647
  break;
1568
1648
  }
1569
1649
  }
1650
+ if (s.ok()) {
1651
+ if (do_uncompress && contents_comp_type != kNoCompression) {
1652
+ comp_contents = BlockContents(block_fetcher.GetCompressedBlock());
1653
+ uncomp_contents = std::move(tmp_contents);
1654
+ } else if (contents_comp_type != kNoCompression) {
1655
+ // do_uncompress must be false, so output of BlockFetcher is
1656
+ // compressed
1657
+ comp_contents = std::move(tmp_contents);
1658
+ } else {
1659
+ uncomp_contents = std::move(tmp_contents);
1660
+ }
1661
+
1662
+ // If filling cache is allowed and a cache is configured, try to put
1663
+ // the block to the cache. Do this here while block_fetcher is in
1664
+ // scope, since comp_contents will be a reference to the compressed
1665
+ // block in block_fetcher
1666
+ s = PutDataBlockToCache(
1667
+ key, block_cache, out_parsed_block, std::move(uncomp_contents),
1668
+ std::move(comp_contents), contents_comp_type, uncompression_dict,
1669
+ GetMemoryAllocator(rep_->table_options), get_context);
1670
+ }
1570
1671
  } else {
1571
1672
  contents_comp_type = GetBlockCompressionType(*contents);
1572
- }
1673
+ if (contents_comp_type != kNoCompression) {
1674
+ comp_contents = std::move(*contents);
1675
+ } else {
1676
+ uncomp_contents = std::move(*contents);
1677
+ }
1573
1678
 
1574
- if (s.ok()) {
1575
- // If filling cache is allowed and a cache is configured, try to put the
1576
- // block to the cache.
1577
- s = PutDataBlockToCache(
1578
- key, block_cache, out_parsed_block, std::move(*contents),
1579
- contents_comp_type, uncompression_dict,
1580
- GetMemoryAllocator(rep_->table_options), get_context);
1679
+ if (s.ok()) {
1680
+ // If filling cache is allowed and a cache is configured, try to put
1681
+ // the block to the cache.
1682
+ s = PutDataBlockToCache(
1683
+ key, block_cache, out_parsed_block, std::move(uncomp_contents),
1684
+ std::move(comp_contents), contents_comp_type, uncompression_dict,
1685
+ GetMemoryAllocator(rep_->table_options), get_context);
1686
+ }
1581
1687
  }
1582
1688
  }
1583
1689
  }
@@ -1693,7 +1799,7 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
1693
1799
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1694
1800
  CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
1695
1801
  BlockCacheLookupContext* lookup_context, bool for_compaction,
1696
- bool use_cache, bool async_read) const {
1802
+ bool use_cache, bool async_read, bool use_block_cache_for_lookup) const {
1697
1803
  assert(out_parsed_block);
1698
1804
  assert(out_parsed_block->IsEmpty());
1699
1805
 
@@ -1702,7 +1808,7 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
1702
1808
  s = MaybeReadBlockAndLoadToCache(
1703
1809
  prefetch_buffer, ro, handle, uncompression_dict, for_compaction,
1704
1810
  out_parsed_block, get_context, lookup_context,
1705
- /*contents=*/nullptr, async_read);
1811
+ /*contents=*/nullptr, async_read, use_block_cache_for_lookup);
1706
1812
 
1707
1813
  if (!s.ok()) {
1708
1814
  return s;
@@ -1913,6 +2019,16 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
1913
2019
  snapshot, read_options.timestamp);
1914
2020
  }
1915
2021
 
2022
+ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
2023
+ SequenceNumber read_seqno, const Slice* timestamp) {
2024
+ if (rep_->fragmented_range_dels == nullptr) {
2025
+ return nullptr;
2026
+ }
2027
+ return new FragmentedRangeTombstoneIterator(rep_->fragmented_range_dels,
2028
+ rep_->internal_comparator,
2029
+ read_seqno, timestamp);
2030
+ }
2031
+
1916
2032
  bool BlockBasedTable::FullFilterKeyMayMatch(
1917
2033
  FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
1918
2034
  const SliceTransform* prefix_extractor, GetContext* get_context,
@@ -2155,7 +2271,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2155
2271
  NewDataBlockIterator<DataBlockIter>(
2156
2272
  read_options, v.handle, &biter, BlockType::kData, get_context,
2157
2273
  &lookup_data_block_context, /*prefetch_buffer=*/nullptr,
2158
- /*for_compaction=*/false, /*async_read=*/false, tmp_status);
2274
+ /*for_compaction=*/false, /*async_read=*/false, tmp_status,
2275
+ /*use_block_cache_for_lookup=*/true);
2159
2276
 
2160
2277
  if (no_io && biter.status().IsIncomplete()) {
2161
2278
  // couldn't get block from block_cache
@@ -2325,7 +2442,7 @@ Status BlockBasedTable::Prefetch(const ReadOptions& read_options,
2325
2442
  read_options, block_handle, &biter, /*type=*/BlockType::kData,
2326
2443
  /*get_context=*/nullptr, &lookup_context,
2327
2444
  /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
2328
- /*async_read=*/false, tmp_status);
2445
+ /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true);
2329
2446
 
2330
2447
  if (!biter.status().ok()) {
2331
2448
  // there was an unexpected error while pre-fetching
@@ -2750,7 +2867,7 @@ Status BlockBasedTable::GetKVPairsFromDataBlocks(
2750
2867
  /*input_iter=*/nullptr, /*type=*/BlockType::kData,
2751
2868
  /*get_context=*/nullptr, /*lookup_context=*/nullptr,
2752
2869
  /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
2753
- /*async_read=*/false, tmp_status));
2870
+ /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true));
2754
2871
  s = datablock_iter->status();
2755
2872
 
2756
2873
  if (!s.ok()) {
@@ -2989,7 +3106,7 @@ Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) {
2989
3106
  /*input_iter=*/nullptr, /*type=*/BlockType::kData,
2990
3107
  /*get_context=*/nullptr, /*lookup_context=*/nullptr,
2991
3108
  /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
2992
- /*async_read=*/false, tmp_status));
3109
+ /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true));
2993
3110
  s = datablock_iter->status();
2994
3111
 
2995
3112
  if (!s.ok()) {
@@ -138,6 +138,9 @@ class BlockBasedTable : public TableReader {
138
138
  FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
139
139
  const ReadOptions& read_options) override;
140
140
 
141
+ FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
142
+ SequenceNumber read_seqno, const Slice* timestamp) override;
143
+
141
144
  // @param skip_filters Disables loading/accessing the filter block
142
145
  Status Get(const ReadOptions& readOptions, const Slice& key,
143
146
  GetContext* get_context, const SliceTransform* prefix_extractor,
@@ -277,6 +280,11 @@ class BlockBasedTable : public TableReader {
277
280
  Status GetKVPairsFromDataBlocks(const ReadOptions& read_options,
278
281
  std::vector<KVPairBlock>* kv_pair_blocks);
279
282
 
283
+ template <typename TBlocklike>
284
+ Status LookupAndPinBlocksInCache(
285
+ const ReadOptions& ro, const BlockHandle& handle,
286
+ CachableEntry<TBlocklike>* out_parsed_block) const;
287
+
280
288
  struct Rep;
281
289
 
282
290
  Rep* get_rep() { return rep_; }
@@ -284,14 +292,12 @@ class BlockBasedTable : public TableReader {
284
292
 
285
293
  // input_iter: if it is not null, update this one and return it as Iterator
286
294
  template <typename TBlockIter>
287
- TBlockIter* NewDataBlockIterator(const ReadOptions& ro,
288
- const BlockHandle& block_handle,
289
- TBlockIter* input_iter, BlockType block_type,
290
- GetContext* get_context,
291
- BlockCacheLookupContext* lookup_context,
292
- FilePrefetchBuffer* prefetch_buffer,
293
- bool for_compaction, bool async_read,
294
- Status& s) const;
295
+ TBlockIter* NewDataBlockIterator(
296
+ const ReadOptions& ro, const BlockHandle& block_handle,
297
+ TBlockIter* input_iter, BlockType block_type, GetContext* get_context,
298
+ BlockCacheLookupContext* lookup_context,
299
+ FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read,
300
+ Status& s, bool use_block_cache_for_lookup) const;
295
301
 
296
302
  // input_iter: if it is not null, update this one and return it as Iterator
297
303
  template <typename TBlockIter>
@@ -348,7 +354,8 @@ class BlockBasedTable : public TableReader {
348
354
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
349
355
  bool for_compaction, CachableEntry<TBlocklike>* block_entry,
350
356
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
351
- BlockContents* contents, bool async_read) const;
357
+ BlockContents* contents, bool async_read,
358
+ bool use_block_cache_for_lookup) const;
352
359
 
353
360
  // Similar to the above, with one crucial difference: it will retrieve the
354
361
  // block from the file even if there are no caches configured (assuming the
@@ -359,7 +366,7 @@ class BlockBasedTable : public TableReader {
359
366
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
360
367
  CachableEntry<TBlocklike>* block_entry, GetContext* get_context,
361
368
  BlockCacheLookupContext* lookup_context, bool for_compaction,
362
- bool use_cache, bool async_read) const;
369
+ bool use_cache, bool async_read, bool use_block_cache_for_lookup) const;
363
370
 
364
371
  template <typename TBlocklike>
365
372
  WithBlocklikeCheck<void, TBlocklike> SaveLookupContextOrTraceRecord(
@@ -408,7 +415,8 @@ class BlockBasedTable : public TableReader {
408
415
  template <typename TBlocklike>
409
416
  WithBlocklikeCheck<Status, TBlocklike> GetDataBlockFromCache(
410
417
  const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
411
- CachableEntry<TBlocklike>* block, GetContext* get_context) const;
418
+ CachableEntry<TBlocklike>* block, GetContext* get_context,
419
+ const UncompressionDict* dict) const;
412
420
 
413
421
  // Put a maybe compressed block to the corresponding block caches.
414
422
  // This method will perform decompression against block_contents if needed
@@ -423,7 +431,9 @@ class BlockBasedTable : public TableReader {
423
431
  template <typename TBlocklike>
424
432
  WithBlocklikeCheck<Status, TBlocklike> PutDataBlockToCache(
425
433
  const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
426
- CachableEntry<TBlocklike>* cached_block, BlockContents&& block_contents,
434
+ CachableEntry<TBlocklike>* cached_block,
435
+ BlockContents&& uncompressed_block_contents,
436
+ BlockContents&& compressed_block_contents,
427
437
  CompressionType block_comp_type,
428
438
  const UncompressionDict& uncompression_dict,
429
439
  MemoryAllocator* memory_allocator, GetContext* get_context) const;
@@ -682,31 +692,34 @@ struct BlockBasedTable::Rep {
682
692
  uint64_t sst_number_for_tracing() const {
683
693
  return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX;
684
694
  }
685
- void CreateFilePrefetchBuffer(size_t readahead_size,
686
- size_t max_readahead_size,
687
- std::unique_ptr<FilePrefetchBuffer>* fpb,
688
- bool implicit_auto_readahead,
689
- uint64_t num_file_reads,
690
- uint64_t num_file_reads_for_auto_readahead,
691
- uint64_t upper_bound_offset) const {
695
+ void CreateFilePrefetchBuffer(
696
+ size_t readahead_size, size_t max_readahead_size,
697
+ std::unique_ptr<FilePrefetchBuffer>* fpb, bool implicit_auto_readahead,
698
+ uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead,
699
+ uint64_t upper_bound_offset,
700
+ const std::function<void(uint64_t, size_t, size_t&)>& readaheadsize_cb,
701
+ FilePrefetchBufferUsage usage) const {
692
702
  fpb->reset(new FilePrefetchBuffer(
693
703
  readahead_size, max_readahead_size,
694
704
  !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */,
695
705
  implicit_auto_readahead, num_file_reads,
696
706
  num_file_reads_for_auto_readahead, upper_bound_offset,
697
- ioptions.fs.get(), ioptions.clock, ioptions.stats));
707
+ ioptions.fs.get(), ioptions.clock, ioptions.stats, readaheadsize_cb,
708
+ usage));
698
709
  }
699
710
 
700
711
  void CreateFilePrefetchBufferIfNotExists(
701
712
  size_t readahead_size, size_t max_readahead_size,
702
713
  std::unique_ptr<FilePrefetchBuffer>* fpb, bool implicit_auto_readahead,
703
714
  uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead,
704
- uint64_t upper_bound_offset) const {
715
+ uint64_t upper_bound_offset,
716
+ const std::function<void(uint64_t, size_t, size_t&)>& readaheadsize_cb,
717
+ FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) const {
705
718
  if (!(*fpb)) {
706
719
  CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb,
707
720
  implicit_auto_readahead, num_file_reads,
708
721
  num_file_reads_for_auto_readahead,
709
- upper_bound_offset);
722
+ upper_bound_offset, readaheadsize_cb, usage);
710
723
  }
711
724
  }
712
725
 
@@ -49,7 +49,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
49
49
  BlockType block_type, GetContext* get_context,
50
50
  BlockCacheLookupContext* lookup_context,
51
51
  FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read,
52
- Status& s) const {
52
+ Status& s, bool use_block_cache_for_lookup) const {
53
53
  using IterBlocklike = typename IterTraits<TBlockIter>::IterBlocklike;
54
54
  PERF_TIMER_GUARD(new_table_block_iter_nanos);
55
55
 
@@ -67,9 +67,13 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
67
67
  // might already be under way and this would invalidate it. Also, the
68
68
  // uncompression dict is typically at the end of the file and would
69
69
  // most likely break the sequentiality of the access pattern.
70
+ // Same is with auto_readahead_size. It iterates over index to lookup for
71
+ // data blocks. And this could break the the sequentiality of the access
72
+ // pattern.
70
73
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
71
- ro.async_io ? nullptr : prefetch_buffer, ro, no_io, ro.verify_checksums,
72
- get_context, lookup_context, &uncompression_dict);
74
+ ((ro.async_io || ro.auto_readahead_size) ? nullptr : prefetch_buffer),
75
+ ro, no_io, ro.verify_checksums, get_context, lookup_context,
76
+ &uncompression_dict);
73
77
  if (!s.ok()) {
74
78
  iter->Invalidate(s);
75
79
  return iter;
@@ -77,15 +81,15 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
77
81
  const UncompressionDict& dict = uncompression_dict.GetValue()
78
82
  ? *uncompression_dict.GetValue()
79
83
  : UncompressionDict::GetEmptyDict();
80
- s = RetrieveBlock(prefetch_buffer, ro, handle, dict,
81
- &block.As<IterBlocklike>(), get_context, lookup_context,
82
- for_compaction,
83
- /* use_cache */ true, async_read);
84
+ s = RetrieveBlock(
85
+ prefetch_buffer, ro, handle, dict, &block.As<IterBlocklike>(),
86
+ get_context, lookup_context, for_compaction,
87
+ /* use_cache */ true, async_read, use_block_cache_for_lookup);
84
88
  } else {
85
89
  s = RetrieveBlock(
86
90
  prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(),
87
91
  &block.As<IterBlocklike>(), get_context, lookup_context, for_compaction,
88
- /* use_cache */ true, async_read);
92
+ /* use_cache */ true, async_read, use_block_cache_for_lookup);
89
93
  }
90
94
 
91
95
  if (s.IsTryAgain() && async_read) {
@@ -50,12 +50,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
50
50
  }
51
51
 
52
52
  // XXX: use_cache=true means double cache query?
53
- statuses[idx_in_batch] =
54
- RetrieveBlock(nullptr, options, handle, uncompression_dict,
55
- &results[idx_in_batch].As<Block_kData>(),
56
- mget_iter->get_context, /* lookup_context */ nullptr,
57
- /* for_compaction */ false, /* use_cache */ true,
58
- /* async_read */ false);
53
+ statuses[idx_in_batch] = RetrieveBlock(
54
+ nullptr, options, handle, uncompression_dict,
55
+ &results[idx_in_batch].As<Block_kData>(), mget_iter->get_context,
56
+ /* lookup_context */ nullptr,
57
+ /* for_compaction */ false, /* use_cache */ true,
58
+ /* async_read */ false, /* use_block_cache_for_lookup */ true);
59
59
  }
60
60
  assert(idx_in_batch == handles->size());
61
61
  CO_RETURN;
@@ -269,7 +269,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
269
269
  nullptr, options, handle, uncompression_dict,
270
270
  /*for_compaction=*/false, block_entry, mget_iter->get_context,
271
271
  /*lookup_context=*/nullptr, &serialized_block,
272
- /*async_read=*/false);
272
+ /*async_read=*/false, /*use_block_cache_for_lookup=*/true);
273
273
 
274
274
  // block_entry value could be null if no block cache is present, i.e
275
275
  // BlockBasedTableOptions::no_block_cache is true and no compressed
@@ -402,6 +402,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
402
402
  BCI block_cache{rep_->table_options.block_cache.get()};
403
403
  std::array<BCI::TypedAsyncLookupHandle, MultiGetContext::MAX_BATCH_SIZE>
404
404
  async_handles;
405
+ BlockCreateContext create_ctx = rep_->create_context;
405
406
  std::array<CacheKey, MultiGetContext::MAX_BATCH_SIZE> cache_keys;
406
407
  size_t cache_lookup_count = 0;
407
408
 
@@ -448,6 +449,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
448
449
  sst_file_range.SkipKey(miter);
449
450
  continue;
450
451
  }
452
+ create_ctx.dict = uncompression_dict.GetValue()
453
+ ? uncompression_dict.GetValue()
454
+ : &UncompressionDict::GetEmptyDict();
451
455
 
452
456
  if (v.handle.offset() == prev_offset) {
453
457
  // This key can reuse the previous block (later on).
@@ -475,7 +479,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
475
479
  GetCacheKey(rep_->base_cache_key, v.handle);
476
480
  async_handle.key = cache_keys[cache_lookup_count].AsSlice();
477
481
  // NB: StartAsyncLookupFull populates async_handle.helper
478
- async_handle.create_context = &rep_->create_context;
482
+ async_handle.create_context = &create_ctx;
479
483
  async_handle.priority = GetCachePriority<Block_kData>();
480
484
  async_handle.stats = rep_->ioptions.statistics.get();
481
485
 
@@ -628,7 +632,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
628
632
  read_options, iiter->value().handle, &next_biter,
629
633
  BlockType::kData, get_context, lookup_data_block_context,
630
634
  /* prefetch_buffer= */ nullptr, /* for_compaction = */ false,
631
- /*async_read = */ false, tmp_s);
635
+ /*async_read = */ false, tmp_s,
636
+ /* use_block_cache_for_lookup = */ true);
632
637
  biter = &next_biter;
633
638
  reusing_prev_block = false;
634
639
  later_reused = false;