@nxtedition/rocksdb 8.2.0 → 8.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/binding.cc +3 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -345
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
  15. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
  17. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
  25. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
  26. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
  30. package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
  31. package/deps/rocksdb/rocksdb/db/builder.h +2 -2
  32. package/deps/rocksdb/rocksdb/db/c.cc +76 -5
  33. package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
  34. package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
  51. package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
  52. package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
  55. package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
  58. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
  71. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
  72. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
  73. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
  74. package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
  75. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
  76. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
  77. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
  78. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
  79. package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
  80. package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
  81. package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
  82. package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
  83. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
  84. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
  85. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
  86. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
  87. package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
  88. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
  90. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
  93. package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
  94. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
  95. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
  96. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
  97. package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
  98. package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
  99. package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
  100. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
  101. package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
  103. package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
  104. package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
  105. package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
  106. package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
  107. package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
  108. package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
  109. package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
  110. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
  111. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
  113. package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
  114. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
  115. package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
  116. package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
  117. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
  118. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
  119. package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
  120. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  121. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
  122. package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
  123. package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
  124. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
  125. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
  126. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  127. package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
  128. package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
  129. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
  130. package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
  131. package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
  144. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
  145. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
  146. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
  149. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
  150. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
  151. package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
  152. package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
  153. package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
  154. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
  155. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  156. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
  157. package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
  158. package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
  159. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
  160. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
  161. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
  162. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
  163. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
  164. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
  165. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
  166. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
  167. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
  168. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
  169. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
  170. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
  171. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
  173. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
  174. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
  175. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
  176. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
  177. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
  178. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
  184. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
  186. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
  188. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
  189. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
  192. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
  193. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  194. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
  195. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
  196. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
  197. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
  198. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
  199. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
  200. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
  201. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  202. package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
  203. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  204. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
  205. package/deps/rocksdb/rocksdb/options/options.cc +12 -53
  206. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  207. package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
  208. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
  209. package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
  210. package/deps/rocksdb/rocksdb/port/lang.h +27 -0
  211. package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
  212. package/deps/rocksdb/rocksdb/src.mk +2 -0
  213. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
  214. package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
  215. package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
  216. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
  217. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
  218. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
  219. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
  220. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
  221. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
  222. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
  223. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
  224. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
  225. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
  226. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
  228. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
  229. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
  230. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
  232. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
  233. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
  234. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
  235. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
  236. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
  237. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
  238. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
  239. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
  240. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
  241. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
  242. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
  243. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
  244. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
  245. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
  247. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
  248. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
  249. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
  250. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
  251. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  252. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  253. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
  254. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
  255. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
  256. package/deps/rocksdb/rocksdb/table/format.cc +4 -4
  257. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  258. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
  259. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
  260. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  261. package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
  262. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
  264. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
  266. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
  267. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
  268. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
  269. package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
  270. package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
  271. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
  272. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
  275. package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
  277. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
  278. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
  279. package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
  280. package/deps/rocksdb/rocksdb/util/compression.h +1 -1
  281. package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
  282. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
  283. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
  284. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  285. package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
  286. package/deps/rocksdb/rocksdb/util/math.h +12 -7
  287. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
  288. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
  289. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
  290. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
  291. package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
  292. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
  293. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  294. package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
  295. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
  296. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
  297. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
  298. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
  299. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  300. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
  301. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
  302. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
  303. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
  304. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
  305. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
  306. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
  307. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
  308. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
  309. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
  310. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
  311. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
  312. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
  313. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
  314. package/package.json +1 -1
  315. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  316. package/prebuilds/linux-x64/node.napi.node +0 -0
  317. /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
  318. /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
  319. /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
  320. /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
  321. /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
@@ -98,7 +98,8 @@ class BlockBasedTable : public TableReader {
98
98
  const BlockBasedTableOptions& table_options,
99
99
  const InternalKeyComparator& internal_key_comparator,
100
100
  std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
101
- std::unique_ptr<TableReader>* table_reader,
101
+ uint8_t block_protection_bytes_per_key,
102
+ std::unique_ptr<TableReader>* table_reader, uint64_t tail_size,
102
103
  std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr =
103
104
  nullptr,
104
105
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
@@ -116,7 +117,8 @@ class BlockBasedTable : public TableReader {
116
117
  const ReadOptions& read_options,
117
118
  const SliceTransform* options_prefix_extractor,
118
119
  const bool need_upper_bound_check,
119
- BlockCacheLookupContext* lookup_context) const;
120
+ BlockCacheLookupContext* lookup_context,
121
+ bool* filter_checked) const;
120
122
 
121
123
  // Returns a new iterator over the table contents.
122
124
  // The result of NewIterator() is initially invalid (caller must
@@ -153,7 +155,8 @@ class BlockBasedTable : public TableReader {
153
155
  // Pre-fetch the disk blocks that correspond to the key range specified by
154
156
  // (kbegin, kend). The call will return error status in the event of
155
157
  // IO or iteration error.
156
- Status Prefetch(const Slice* begin, const Slice* end) override;
158
+ Status Prefetch(const ReadOptions& read_options, const Slice* begin,
159
+ const Slice* end) override;
157
160
 
158
161
  // Given a key, return an approximate byte offset in the file where
159
162
  // the data for that key begins (or would begin if the key were
@@ -161,15 +164,16 @@ class BlockBasedTable : public TableReader {
161
164
  // bytes, and so includes effects like compression of the underlying data.
162
165
  // E.g., the approximate offset of the last key in the table will
163
166
  // be close to the file length.
164
- uint64_t ApproximateOffsetOf(const Slice& key,
167
+ uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
168
+ const Slice& key,
165
169
  TableReaderCaller caller) override;
166
170
 
167
171
  // Given start and end keys, return the approximate data size in the file
168
172
  // between the keys. The returned value is in terms of file bytes, and so
169
173
  // includes effects like compression of the underlying data.
170
174
  // The start key must not be greater than the end key.
171
- uint64_t ApproximateSize(const Slice& start, const Slice& end,
172
- TableReaderCaller caller) override;
175
+ uint64_t ApproximateSize(const ReadOptions& read_options, const Slice& start,
176
+ const Slice& end, TableReaderCaller caller) override;
173
177
 
174
178
  Status ApproximateKeyAnchors(const ReadOptions& read_options,
175
179
  std::vector<Anchor>& anchors) override;
@@ -222,8 +226,9 @@ class BlockBasedTable : public TableReader {
222
226
  virtual size_t ApproximateMemoryUsage() const = 0;
223
227
  // Cache the dependencies of the index reader (e.g. the partitions
224
228
  // of a partitioned index).
225
- virtual Status CacheDependencies(const ReadOptions& /*ro*/,
226
- bool /* pin */) {
229
+ virtual Status CacheDependencies(
230
+ const ReadOptions& /*ro*/, bool /* pin */,
231
+ FilePrefetchBuffer* /* tail_prefetch_buffer */) {
227
232
  return Status::OK();
228
233
  }
229
234
  };
@@ -244,6 +249,9 @@ class BlockBasedTable : public TableReader {
244
249
  bool redundant,
245
250
  Statistics* const statistics);
246
251
 
252
+ Statistics* GetStatistics() const;
253
+ bool IsLastLevel() const;
254
+
247
255
  // Get the size to read from storage for a BlockHandle. size_t because we
248
256
  // are about to load into memory.
249
257
  static inline size_t BlockSizeWithTrailer(const BlockHandle& handle) {
@@ -265,7 +273,8 @@ class BlockBasedTable : public TableReader {
265
273
 
266
274
  // Retrieve all key value pairs from data blocks in the table.
267
275
  // The key retrieved are internal keys.
268
- Status GetKVPairsFromDataBlocks(std::vector<KVPairBlock>* kv_pair_blocks);
276
+ Status GetKVPairsFromDataBlocks(const ReadOptions& read_options,
277
+ std::vector<KVPairBlock>* kv_pair_blocks);
269
278
 
270
279
  struct Rep;
271
280
 
@@ -351,11 +360,22 @@ class BlockBasedTable : public TableReader {
351
360
  BlockCacheLookupContext* lookup_context, bool for_compaction,
352
361
  bool use_cache, bool async_read) const;
353
362
 
363
+ template <typename TBlocklike>
364
+ WithBlocklikeCheck<void, TBlocklike> SaveLookupContextOrTraceRecord(
365
+ const Slice& block_key, bool is_cache_hit, const ReadOptions& ro,
366
+ const TBlocklike* parsed_block_value,
367
+ BlockCacheLookupContext* lookup_context) const;
368
+
369
+ void FinishTraceRecord(const BlockCacheLookupContext& lookup_context,
370
+ const Slice& block_key, const Slice& referenced_key,
371
+ bool does_referenced_key_exist,
372
+ uint64_t referenced_data_size) const;
373
+
354
374
  DECLARE_SYNC_AND_ASYNC_CONST(
355
375
  void, RetrieveMultipleBlocks, const ReadOptions& options,
356
376
  const MultiGetRange* batch,
357
377
  const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
358
- Status* statuses, CachableEntry<Block>* results, char* scratch,
378
+ Status* statuses, CachableEntry<Block_kData>* results, char* scratch,
359
379
  const UncompressionDict& uncompression_dict);
360
380
 
361
381
  // Get the iterator from the index reader.
@@ -429,13 +449,13 @@ class BlockBasedTable : public TableReader {
429
449
  const SliceTransform* prefix_extractor,
430
450
  GetContext* get_context,
431
451
  BlockCacheLookupContext* lookup_context,
432
- Env::IOPriority rate_limiter_priority) const;
452
+ const ReadOptions& read_options) const;
433
453
 
434
454
  void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
435
455
  const bool no_io,
436
456
  const SliceTransform* prefix_extractor,
437
457
  BlockCacheLookupContext* lookup_context,
438
- Env::IOPriority rate_limiter_priority) const;
458
+ const ReadOptions& read_options) const;
439
459
 
440
460
  // If force_direct_prefetch is true, always prefetching to RocksDB
441
461
  // buffer, rather than calling RandomAccessFile::Prefetch().
@@ -443,7 +463,8 @@ class BlockBasedTable : public TableReader {
443
463
  const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
444
464
  bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
445
465
  const bool prefetch_all, const bool preload_all,
446
- std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats);
466
+ std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats,
467
+ uint64_t tail_size, Logger* const logger);
447
468
  Status ReadMetaIndexBlock(const ReadOptions& ro,
448
469
  FilePrefetchBuffer* prefetch_buffer,
449
470
  std::unique_ptr<Block>* metaindex_block,
@@ -466,7 +487,8 @@ class BlockBasedTable : public TableReader {
466
487
 
467
488
  static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name);
468
489
 
469
- Status VerifyChecksumInMetaBlocks(InternalIteratorBase<Slice>* index_iter);
490
+ Status VerifyChecksumInMetaBlocks(const ReadOptions& read_options,
491
+ InternalIteratorBase<Slice>* index_iter);
470
492
  Status VerifyChecksumInBlocks(const ReadOptions& read_options,
471
493
  InternalIteratorBase<IndexValue>* index_iter);
472
494
 
@@ -494,6 +516,8 @@ class BlockBasedTable : public TableReader {
494
516
  // in building the table file, otherwise true.
495
517
  bool PrefixExtractorChanged(const SliceTransform* prefix_extractor) const;
496
518
 
519
+ bool TimestampMayMatch(const ReadOptions& read_options) const;
520
+
497
521
  // A cumulative data block file read in MultiGet lower than this size will
498
522
  // use a stack buffer
499
523
  static constexpr size_t kMultiGetReadStackBufSize = 8192;
@@ -595,6 +619,12 @@ struct BlockBasedTable::Rep {
595
619
  // move is involved
596
620
  int level;
597
621
 
622
+ // the timestamp range of table
623
+ // Points into memory owned by TableProperties. This would need to change if
624
+ // TableProperties become subject to cache eviction.
625
+ Slice min_timestamp;
626
+ Slice max_timestamp;
627
+
598
628
  // If false, blocks in this file are definitely all uncompressed. Knowing this
599
629
  // before reading individual blocks enables certain optimizations.
600
630
  bool blocks_maybe_compressed = true;
@@ -68,7 +68,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
68
68
  // uncompression dict is typically at the end of the file and would
69
69
  // most likely break the sequentiality of the access pattern.
70
70
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
71
- ro.async_io ? nullptr : prefetch_buffer, no_io, ro.verify_checksums,
71
+ ro.async_io ? nullptr : prefetch_buffer, ro, no_io, ro.verify_checksums,
72
72
  get_context, lookup_context, &uncompression_dict);
73
73
  if (!s.ok()) {
74
74
  iter->Invalidate(s);
@@ -32,7 +32,7 @@ namespace ROCKSDB_NAMESPACE {
32
32
  DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
33
33
  (const ReadOptions& options, const MultiGetRange* batch,
34
34
  const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
35
- Status* statuses, CachableEntry<Block>* results, char* scratch,
35
+ Status* statuses, CachableEntry<Block_kData>* results, char* scratch,
36
36
  const UncompressionDict& uncompression_dict) const {
37
37
  RandomAccessFileReader* file = rep_->file.get();
38
38
  const Footer& footer = rep_->footer;
@@ -44,17 +44,16 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
44
44
  size_t idx_in_batch = 0;
45
45
  for (auto mget_iter = batch->begin(); mget_iter != batch->end();
46
46
  ++mget_iter, ++idx_in_batch) {
47
- BlockCacheLookupContext lookup_data_block_context(
48
- TableReaderCaller::kUserMultiGet);
49
47
  const BlockHandle& handle = (*handles)[idx_in_batch];
50
48
  if (handle.IsNull()) {
51
49
  continue;
52
50
  }
53
51
 
52
+ // XXX: use_cache=true means double cache query?
54
53
  statuses[idx_in_batch] =
55
54
  RetrieveBlock(nullptr, options, handle, uncompression_dict,
56
55
  &results[idx_in_batch].As<Block_kData>(),
57
- mget_iter->get_context, &lookup_data_block_context,
56
+ mget_iter->get_context, /* lookup_context */ nullptr,
58
57
  /* for_compaction */ false, /* use_cache */ true,
59
58
  /* async_read */ false);
60
59
  }
@@ -259,17 +258,15 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
259
258
 
260
259
  if (s.ok()) {
261
260
  if (options.fill_cache) {
262
- BlockCacheLookupContext lookup_data_block_context(
263
- TableReaderCaller::kUserMultiGet);
264
- CachableEntry<Block>* block_entry = &results[idx_in_batch];
261
+ CachableEntry<Block_kData>* block_entry = &results[idx_in_batch];
265
262
  // MaybeReadBlockAndLoadToCache will insert into the block caches if
266
263
  // necessary. Since we're passing the serialized block contents, it
267
264
  // will avoid looking up the block cache
268
265
  s = MaybeReadBlockAndLoadToCache(
269
266
  nullptr, options, handle, uncompression_dict,
270
- /*for_compaction=*/false, &block_entry->As<Block_kData>(),
271
- mget_iter->get_context, &lookup_data_block_context,
272
- &serialized_block, /*async_read=*/false);
267
+ /*for_compaction=*/false, block_entry, mget_iter->get_context,
268
+ /*lookup_context=*/nullptr, &serialized_block,
269
+ /*async_read=*/false);
273
270
 
274
271
  // block_entry value could be null if no block cache is present, i.e
275
272
  // BlockBasedTableOptions::no_block_cache is true and no compressed
@@ -301,7 +298,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
301
298
  contents = std::move(serialized_block);
302
299
  }
303
300
  if (s.ok()) {
304
- results[idx_in_batch].SetOwnedValue(std::make_unique<Block>(
301
+ results[idx_in_batch].SetOwnedValue(std::make_unique<Block_kData>(
305
302
  std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
306
303
  }
307
304
  }
@@ -331,11 +328,13 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
331
328
  if (sst_file_range.begin()->get_context) {
332
329
  tracing_mget_id = sst_file_range.begin()->get_context->get_tracing_get_id();
333
330
  }
334
- BlockCacheLookupContext lookup_context{
331
+ // TODO: need more than one lookup_context here to track individual filter
332
+ // and index partition hits and misses.
333
+ BlockCacheLookupContext metadata_lookup_context{
335
334
  TableReaderCaller::kUserMultiGet, tracing_mget_id,
336
335
  /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
337
336
  FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
338
- &lookup_context, read_options.rate_limiter_priority);
337
+ &metadata_lookup_context, read_options);
339
338
 
340
339
  if (!sst_file_range.empty()) {
341
340
  IndexBlockIter iiter_on_stack;
@@ -345,9 +344,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
345
344
  if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
346
345
  need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
347
346
  }
348
- auto iiter =
349
- NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
350
- sst_file_range.begin()->get_context, &lookup_context);
347
+ auto iiter = NewIndexIterator(
348
+ read_options, need_upper_bound_check, &iiter_on_stack,
349
+ sst_file_range.begin()->get_context, &metadata_lookup_context);
351
350
  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
352
351
  if (iiter != &iiter_on_stack) {
353
352
  iiter_unique_ptr.reset(iiter);
@@ -355,11 +354,22 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
355
354
 
356
355
  uint64_t prev_offset = std::numeric_limits<uint64_t>::max();
357
356
  autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE> block_handles;
358
- std::array<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE> results;
357
+ std::array<CachableEntry<Block_kData>, MultiGetContext::MAX_BATCH_SIZE>
358
+ results;
359
359
  std::array<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
360
+ // Empty data_lookup_contexts means "unused," when block cache tracing is
361
+ // disabled. (Limited options as element type is not default contructible.)
362
+ std::vector<BlockCacheLookupContext> data_lookup_contexts;
360
363
  MultiGetContext::Mask reused_mask = 0;
361
364
  char stack_buf[kMultiGetReadStackBufSize];
362
365
  std::unique_ptr<char[]> block_buf;
366
+ if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
367
+ // Awkward because BlockCacheLookupContext is not CopyAssignable
368
+ data_lookup_contexts.reserve(MultiGetContext::MAX_BATCH_SIZE);
369
+ for (size_t i = 0; i < MultiGetContext::MAX_BATCH_SIZE; ++i) {
370
+ data_lookup_contexts.push_back(metadata_lookup_context);
371
+ }
372
+ }
363
373
  {
364
374
  MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
365
375
  sst_file_range.end());
@@ -411,9 +421,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
411
421
  uncompression_dict_status =
412
422
  rep_->uncompression_dict_reader
413
423
  ->GetOrReadUncompressionDictionary(
414
- nullptr /* prefetch_buffer */, no_io,
424
+ nullptr /* prefetch_buffer */, read_options, no_io,
415
425
  read_options.verify_checksums, get_context,
416
- &lookup_context, &uncompression_dict);
426
+ &metadata_lookup_context, &uncompression_dict);
417
427
  uncompression_dict_inited = true;
418
428
  }
419
429
 
@@ -442,9 +452,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
442
452
  // Lookup the cache for the given data block referenced by an index
443
453
  // iterator value (i.e BlockHandle). If it exists in the cache,
444
454
  // initialize block to the contents of the data block.
445
- // TODO?
446
- // BlockCacheLookupContext lookup_data_block_context(
447
- // TableReaderCaller::kUserMultiGet);
448
455
 
449
456
  // An async version of MaybeReadBlockAndLoadToCache /
450
457
  // GetDataBlockFromCache
@@ -492,6 +499,11 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
492
499
  total_len += BlockSizeWithTrailer(block_handles[i]);
493
500
  UpdateCacheMissMetrics(BlockType::kData, get_context);
494
501
  }
502
+ if (!data_lookup_contexts.empty()) {
503
+ // Populate cache key before it's discarded
504
+ data_lookup_contexts[i].block_key =
505
+ async_handles[lookup_idx].key.ToString();
506
+ }
495
507
  ++lookup_idx;
496
508
  }
497
509
  }
@@ -547,24 +559,26 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
547
559
  bool first_block = true;
548
560
  do {
549
561
  DataBlockIter* biter = nullptr;
562
+ uint64_t referenced_data_size = 0;
563
+ Block_kData* parsed_block_value = nullptr;
550
564
  bool reusing_prev_block;
551
565
  bool later_reused;
552
- uint64_t referenced_data_size = 0;
553
566
  bool does_referenced_key_exist = false;
554
- BlockCacheLookupContext lookup_data_block_context(
555
- TableReaderCaller::kUserMultiGet, tracing_mget_id,
556
- /*_get_from_user_specified_snapshot=*/read_options.snapshot !=
557
- nullptr);
567
+ bool handle_present = false;
568
+ BlockCacheLookupContext* lookup_data_block_context =
569
+ data_lookup_contexts.empty() ? nullptr
570
+ : &data_lookup_contexts[idx_in_batch];
558
571
  if (first_block) {
559
- if (!block_handles[idx_in_batch].IsNull() ||
560
- !results[idx_in_batch].IsEmpty()) {
572
+ handle_present = !block_handles[idx_in_batch].IsNull();
573
+ parsed_block_value = results[idx_in_batch].GetValue();
574
+ if (handle_present || parsed_block_value) {
561
575
  first_biter.Invalidate(Status::OK());
562
576
  NewDataBlockIterator<DataBlockIter>(
563
- read_options, results[idx_in_batch], &first_biter,
577
+ read_options, results[idx_in_batch].As<Block>(), &first_biter,
564
578
  statuses[idx_in_batch]);
565
579
  reusing_prev_block = false;
566
580
  } else {
567
- // If handler is null and result is empty, then the status is never
581
+ // If handle is null and result is empty, then the status is never
568
582
  // set, which should be the initial value: ok().
569
583
  assert(statuses[idx_in_batch].ok());
570
584
  reusing_prev_block = true;
@@ -589,7 +603,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
589
603
  Status tmp_s;
590
604
  NewDataBlockIterator<DataBlockIter>(
591
605
  read_options, iiter->value().handle, &next_biter,
592
- BlockType::kData, get_context, &lookup_data_block_context,
606
+ BlockType::kData, get_context, lookup_data_block_context,
593
607
  /* prefetch_buffer= */ nullptr, /* for_compaction = */ false,
594
608
  /*async_read = */ false, tmp_s);
595
609
  biter = &next_biter;
@@ -684,35 +698,23 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
684
698
  // Write the block cache access.
685
699
  // XXX: There appear to be 'break' statements above that bypass this
686
700
  // writing of the block cache trace record
687
- if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled() &&
688
- !reusing_prev_block) {
689
- // Avoid making copy of block_key, cf_name, and referenced_key when
690
- // constructing the access record.
701
+ if (lookup_data_block_context && !reusing_prev_block && first_block) {
691
702
  Slice referenced_key;
692
703
  if (does_referenced_key_exist) {
693
704
  referenced_key = biter->key();
694
705
  } else {
695
706
  referenced_key = key;
696
707
  }
697
- BlockCacheTraceRecord access_record(
698
- rep_->ioptions.clock->NowMicros(),
699
- /*_block_key=*/"", lookup_data_block_context.block_type,
700
- lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
701
- /*_cf_name=*/"", rep_->level_for_tracing(),
702
- rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
703
- lookup_data_block_context.is_cache_hit,
704
- lookup_data_block_context.no_insert,
705
- lookup_data_block_context.get_id,
706
- lookup_data_block_context.get_from_user_specified_snapshot,
707
- /*_referenced_key=*/"", referenced_data_size,
708
- lookup_data_block_context.num_keys_in_block,
709
- does_referenced_key_exist);
710
- // TODO: Should handle status here?
711
- block_cache_tracer_
712
- ->WriteBlockAccess(access_record,
713
- lookup_data_block_context.block_key,
714
- rep_->cf_name_for_tracing(), referenced_key)
715
- .PermitUncheckedError();
708
+
709
+ // block_key is self-assigned here (previously assigned from
710
+ // cache_keys / async_handles, now out of scope)
711
+ SaveLookupContextOrTraceRecord(lookup_data_block_context->block_key,
712
+ /*is_cache_hit=*/!handle_present,
713
+ read_options, parsed_block_value,
714
+ lookup_data_block_context);
715
+ FinishTraceRecord(
716
+ *lookup_data_block_context, lookup_data_block_context->block_key,
717
+ referenced_key, does_referenced_key_exist, referenced_data_size);
716
718
  }
717
719
  s = biter->status();
718
720
  if (done) {
@@ -730,7 +732,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
730
732
  } while (iiter->Valid());
731
733
 
732
734
  if (matched && filter != nullptr) {
733
- RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
735
+ if (rep_->whole_key_filtering) {
736
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
737
+ } else {
738
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE);
739
+ }
740
+ // Includes prefix stats
734
741
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
735
742
  rep_->level);
736
743
  }
@@ -116,8 +116,9 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
116
116
  bool prefetch_index_and_filter_in_cache = true,
117
117
  Status* status = nullptr) {
118
118
  const MutableCFOptions moptions(options_);
119
- TableReaderOptions table_reader_options = TableReaderOptions(
120
- ioptions, moptions.prefix_extractor, EnvOptions(), comparator);
119
+ TableReaderOptions table_reader_options =
120
+ TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(),
121
+ comparator, 0 /* block_protection_bytes_per_key */);
121
122
 
122
123
  std::unique_ptr<RandomAccessFileReader> file;
123
124
  NewFileReader(table_name, foptions, &file);
@@ -499,6 +500,7 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
499
500
 
500
501
  std::unique_ptr<BlockBasedTable> table;
501
502
  Options options;
503
+ options.statistics = CreateDBStatistics();
502
504
  ImmutableOptions ioptions(options);
503
505
  FileOptions foptions;
504
506
  foptions.use_direct_reads = use_direct_reads_;
@@ -528,8 +530,12 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
528
530
  static_cast<int>(handle.offset()), 128));
529
531
 
530
532
  NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table);
533
+ ASSERT_EQ(0,
534
+ options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
531
535
  Status s = table->VerifyChecksum(ReadOptions(),
532
536
  TableReaderCaller::kUserVerifyChecksum);
537
+ ASSERT_EQ(1,
538
+ options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
533
539
  ASSERT_EQ(s.code(), Status::kCorruption);
534
540
  }
535
541
 
@@ -37,7 +37,8 @@ class BlockBuilder {
37
37
  void SwapAndReset(std::string& buffer);
38
38
 
39
39
  // REQUIRES: Finish() has not been called since the last call to Reset().
40
- // REQUIRES: key is larger than any previously added key
40
+ // REQUIRES: Unless a range tombstone block, key is larger than any previously
41
+ // added key
41
42
  // DO NOT mix with AddWithLastKey() between Resets. For efficiency, use
42
43
  // AddWithLastKey() in contexts where previous added key is already known
43
44
  // and delta encoding might be used.
@@ -47,7 +48,8 @@ class BlockBuilder {
47
48
  // A faster version of Add() if the previous key is already known for all
48
49
  // Add()s.
49
50
  // REQUIRES: Finish() has not been called since the last call to Reset().
50
- // REQUIRES: key is larger than any previously added key
51
+ // REQUIRES: Unless a range tombstone block, key is larger than any previously
52
+ // added key
51
53
  // REQUIRES: if AddWithLastKey has been called since last Reset(), last_key
52
54
  // is the key from most recent AddWithLastKey. (For convenience, last_key
53
55
  // is ignored on first call after creation or Reset().)
@@ -11,17 +11,25 @@ void BlockCreateContext::Create(std::unique_ptr<Block_kData>* parsed_out,
11
11
  BlockContents&& block) {
12
12
  parsed_out->reset(new Block_kData(
13
13
  std::move(block), table_options->read_amp_bytes_per_bit, statistics));
14
+ parsed_out->get()->InitializeDataBlockProtectionInfo(protection_bytes_per_key,
15
+ raw_ucmp);
14
16
  }
15
17
  void BlockCreateContext::Create(std::unique_ptr<Block_kIndex>* parsed_out,
16
18
  BlockContents&& block) {
17
19
  parsed_out->reset(new Block_kIndex(std::move(block),
18
20
  /*read_amp_bytes_per_bit*/ 0, statistics));
21
+ parsed_out->get()->InitializeIndexBlockProtectionInfo(
22
+ protection_bytes_per_key, raw_ucmp, index_value_is_full,
23
+ index_has_first_key);
19
24
  }
20
25
  void BlockCreateContext::Create(
21
26
  std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
22
27
  BlockContents&& block) {
23
28
  parsed_out->reset(new Block_kFilterPartitionIndex(
24
29
  std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
30
+ parsed_out->get()->InitializeIndexBlockProtectionInfo(
31
+ protection_bytes_per_key, raw_ucmp, index_value_is_full,
32
+ index_has_first_key);
25
33
  }
26
34
  void BlockCreateContext::Create(
27
35
  std::unique_ptr<Block_kRangeDeletion>* parsed_out, BlockContents&& block) {
@@ -32,6 +40,8 @@ void BlockCreateContext::Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
32
40
  BlockContents&& block) {
33
41
  parsed_out->reset(new Block_kMetaIndex(
34
42
  std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
43
+ parsed_out->get()->InitializeMetaIndexBlockProtectionInfo(
44
+ protection_bytes_per_key);
35
45
  }
36
46
 
37
47
  void BlockCreateContext::Create(
@@ -70,14 +70,26 @@ class Block_kMetaIndex : public Block {
70
70
  struct BlockCreateContext : public Cache::CreateContext {
71
71
  BlockCreateContext() {}
72
72
  BlockCreateContext(const BlockBasedTableOptions* _table_options,
73
- Statistics* _statistics, bool _using_zstd)
73
+ Statistics* _statistics, bool _using_zstd,
74
+ uint8_t _protection_bytes_per_key,
75
+ const Comparator* _raw_ucmp,
76
+ bool _index_value_is_full = false,
77
+ bool _index_has_first_key = false)
74
78
  : table_options(_table_options),
75
79
  statistics(_statistics),
76
- using_zstd(_using_zstd) {}
80
+ using_zstd(_using_zstd),
81
+ protection_bytes_per_key(_protection_bytes_per_key),
82
+ raw_ucmp(_raw_ucmp),
83
+ index_value_is_full(_index_value_is_full),
84
+ index_has_first_key(_index_has_first_key) {}
77
85
 
78
86
  const BlockBasedTableOptions* table_options = nullptr;
79
87
  Statistics* statistics = nullptr;
80
88
  bool using_zstd = false;
89
+ uint8_t protection_bytes_per_key = 0;
90
+ const Comparator* raw_ucmp = nullptr;
91
+ bool index_value_is_full;
92
+ bool index_has_first_key;
81
93
 
82
94
  // For TypedCacheInterface
83
95
  template <typename TBlocklike>