@nxtedition/rocksdb 5.2.21 → 5.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (909) hide show
  1. package/binding.cc +216 -252
  2. package/binding.gyp +78 -72
  3. package/deps/rocksdb/build_version.cc +70 -4
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  5. package/deps/rocksdb/rocksdb/Makefile +459 -469
  6. package/deps/rocksdb/rocksdb/README.md +4 -4
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  31. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  32. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  33. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  34. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  35. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  36. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  37. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  38. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  39. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  51. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  56. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  57. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  60. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  61. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  62. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  63. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  64. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  65. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  67. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  68. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  69. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  71. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  74. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  75. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  96. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  97. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  98. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  99. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  100. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  101. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  102. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  103. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  104. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  105. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  106. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  107. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  108. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  110. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  111. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  112. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  122. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  124. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  125. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  126. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  127. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  128. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  129. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  131. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  132. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  133. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  134. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  135. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  136. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  137. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  138. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  139. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  140. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  141. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  142. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  143. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  144. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  145. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  146. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  147. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  148. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  149. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  150. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  151. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  152. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  153. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  154. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  155. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  156. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  157. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  158. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  159. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  160. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  161. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  162. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  163. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  164. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  165. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  166. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  167. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  168. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  169. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  170. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  171. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  172. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  174. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  175. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  176. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  178. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  179. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  180. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  182. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  183. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  184. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  185. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  186. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  187. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  188. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  189. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  190. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  191. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  192. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  193. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  194. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  195. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  196. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  197. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  198. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  199. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  200. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  201. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  202. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  203. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  204. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  205. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  206. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  207. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  210. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  212. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  213. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  216. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  217. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  218. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  219. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  220. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  223. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  224. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  226. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  227. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  228. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  229. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  230. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  231. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  232. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  233. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  234. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  235. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  236. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  237. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  238. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  239. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  240. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  241. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  242. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  243. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  244. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  245. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  246. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  247. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  248. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  249. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  250. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  251. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  252. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  253. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  254. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  255. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  256. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  257. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  258. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  259. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  260. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  261. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  262. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  263. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  264. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  265. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  266. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  267. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  268. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  269. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  270. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  271. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  273. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  274. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  280. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  285. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  286. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  287. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  288. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  289. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  290. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  291. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  292. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  293. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  294. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  295. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  296. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  297. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  298. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  299. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  300. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  301. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  302. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  303. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  304. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  305. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  306. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  307. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  308. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  309. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  310. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  313. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  315. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  316. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  317. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  318. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  319. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  320. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  321. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  324. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  325. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  326. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  327. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  334. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  335. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  336. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  342. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  343. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  344. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  345. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  346. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  348. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  349. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  350. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  353. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  354. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  357. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  358. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  359. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  360. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  361. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  362. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  363. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  364. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  365. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  366. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  367. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  368. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  371. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  372. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  373. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  375. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  376. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  378. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  380. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  381. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  382. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  383. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  386. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  387. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  389. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  407. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  408. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  410. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  412. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  413. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  414. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  415. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  416. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  417. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  418. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  419. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  420. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  421. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  422. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  423. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  424. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  425. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  426. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  427. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  428. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  429. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  430. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  431. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  432. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  433. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  434. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  435. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  436. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  437. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  438. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  439. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  440. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  441. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  442. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  443. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  444. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  445. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  446. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  447. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  448. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  449. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  450. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  451. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  452. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  453. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  454. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  455. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  456. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  457. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  458. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  459. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  460. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  461. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  462. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  463. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  464. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  465. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  466. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  467. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  468. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  469. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  470. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  471. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  472. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  473. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  474. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  475. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  476. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  477. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  478. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  479. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  480. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  481. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  482. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  483. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  484. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  485. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  486. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  487. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  488. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  489. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  490. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  491. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  492. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  493. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  494. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  495. package/deps/rocksdb/rocksdb/python.mk +9 -0
  496. package/deps/rocksdb/rocksdb/src.mk +82 -34
  497. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  498. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  499. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  500. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  501. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  502. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  503. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  504. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  505. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  506. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  507. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  508. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  514. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  515. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  516. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  517. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  518. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  519. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  520. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  521. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  522. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  523. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  524. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  525. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  526. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  527. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  529. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  530. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  531. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  532. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  534. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  535. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  536. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  537. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  538. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  539. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  540. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  541. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  542. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  543. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  544. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  546. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  547. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  548. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  549. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  550. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  551. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  552. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  553. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  554. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  555. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  556. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  557. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  558. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  559. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  560. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  561. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  562. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  563. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  564. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  565. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  566. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  567. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  568. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  569. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  570. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  571. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  572. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  573. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  574. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  575. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  576. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  577. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  578. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  579. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  580. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  581. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  582. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  586. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  587. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  588. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  589. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  590. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  591. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  592. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  593. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  594. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  595. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  596. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  597. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  598. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  599. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  600. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  601. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  602. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  603. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  604. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  605. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  606. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  607. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  608. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  609. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  610. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  611. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  612. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  613. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  614. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  615. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  616. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  617. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  618. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  619. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  620. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  621. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  622. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  623. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  624. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  625. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  626. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  627. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  628. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  629. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  630. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  631. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  632. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  633. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  634. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  635. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  636. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  637. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  638. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  639. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  640. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  641. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  642. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  643. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  644. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  645. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  646. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  647. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  648. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  649. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  650. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  651. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  652. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  653. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  654. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  655. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  656. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  657. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  658. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  659. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  660. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  661. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  662. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  663. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  664. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  665. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  666. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  667. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  668. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  669. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  670. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  671. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  672. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  673. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  674. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  675. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  676. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  677. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  678. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  679. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  680. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  681. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  682. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  683. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  684. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  685. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  686. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  687. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  688. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  689. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  690. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  691. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  692. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  693. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  694. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  695. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  696. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  697. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  698. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  699. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  700. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  701. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  702. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  703. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  704. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  705. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  706. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  707. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  708. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  709. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  710. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  711. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  712. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  713. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  714. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  715. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  716. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  717. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  718. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  719. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  720. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  721. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  722. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  723. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  724. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  725. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  726. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  727. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  728. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  729. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  730. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  731. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  732. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  733. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  734. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  741. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  742. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  743. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  744. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  745. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  746. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  747. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  748. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  749. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  750. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  751. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  752. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  753. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  754. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  755. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  756. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  757. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  758. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  759. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  760. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  761. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  762. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  763. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  764. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  765. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  766. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  767. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  768. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  769. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  770. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  771. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  772. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  773. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  774. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  775. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  776. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  777. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  778. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  779. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  780. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  786. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  787. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  788. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  789. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  790. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  791. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  792. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  793. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  794. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  795. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  796. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  797. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  798. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  799. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  802. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  803. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  804. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  805. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  806. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  807. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  808. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  809. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  810. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  811. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  812. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  813. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  814. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  815. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  816. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  817. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  818. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  821. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  822. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  823. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  824. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  825. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  826. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  827. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  828. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  829. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  830. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  831. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  832. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  833. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  834. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  835. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  836. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  837. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  838. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  839. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  840. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  841. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  842. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  843. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  844. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  845. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  846. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  847. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  848. package/deps/rocksdb/rocksdb.gyp +425 -446
  849. package/package.json +8 -8
  850. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  851. package/prebuilds/darwin-x86/node.napi.node +0 -0
  852. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  853. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  854. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  855. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  856. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  857. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  858. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  859. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  860. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  861. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  862. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  863. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  864. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  865. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  866. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  867. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  868. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  869. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  870. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  871. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  872. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  873. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  874. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  904. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  905. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  906. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  907. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  908. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  909. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -11,36 +11,48 @@
11
11
  #include <algorithm>
12
12
  #include <array>
13
13
  #include <limits>
14
+ #include <memory>
14
15
  #include <string>
16
+ #include <unordered_set>
15
17
  #include <utility>
16
18
  #include <vector>
17
19
 
20
+ #include "cache/cache_entry_roles.h"
21
+ #include "cache/cache_key.h"
18
22
  #include "cache/sharded_cache.h"
19
-
23
+ #include "db/compaction/compaction_picker.h"
20
24
  #include "db/dbformat.h"
21
25
  #include "db/pinned_iterators_manager.h"
22
26
  #include "file/file_prefetch_buffer.h"
23
27
  #include "file/file_util.h"
24
28
  #include "file/random_access_file_reader.h"
29
+ #include "logging/logging.h"
25
30
  #include "monitoring/perf_context_imp.h"
26
- #include "options/options_helper.h"
31
+ #include "port/lang.h"
27
32
  #include "rocksdb/cache.h"
28
33
  #include "rocksdb/comparator.h"
34
+ #include "rocksdb/convenience.h"
29
35
  #include "rocksdb/env.h"
30
36
  #include "rocksdb/file_system.h"
31
37
  #include "rocksdb/filter_policy.h"
32
38
  #include "rocksdb/iterator.h"
33
39
  #include "rocksdb/options.h"
40
+ #include "rocksdb/snapshot.h"
34
41
  #include "rocksdb/statistics.h"
42
+ #include "rocksdb/system_clock.h"
35
43
  #include "rocksdb/table.h"
36
44
  #include "rocksdb/table_properties.h"
45
+ #include "rocksdb/trace_record.h"
37
46
  #include "table/block_based/binary_search_index_reader.h"
38
47
  #include "table/block_based/block.h"
39
48
  #include "table/block_based/block_based_filter_block.h"
40
49
  #include "table/block_based/block_based_table_factory.h"
41
50
  #include "table/block_based/block_based_table_iterator.h"
51
+ #include "table/block_based/block_like_traits.h"
42
52
  #include "table/block_based/block_prefix_index.h"
53
+ #include "table/block_based/block_type.h"
43
54
  #include "table/block_based/filter_block.h"
55
+ #include "table/block_based/filter_policy_internal.h"
44
56
  #include "table/block_based/full_filter_block.h"
45
57
  #include "table/block_based/hash_index_reader.h"
46
58
  #include "table/block_based/partitioned_filter_block.h"
@@ -52,11 +64,9 @@
52
64
  #include "table/meta_blocks.h"
53
65
  #include "table/multiget_context.h"
54
66
  #include "table/persistent_cache_helper.h"
67
+ #include "table/persistent_cache_options.h"
55
68
  #include "table/sst_file_writer_collectors.h"
56
69
  #include "table/two_level_iterator.h"
57
-
58
- #include "monitoring/perf_context_imp.h"
59
- #include "port/lang.h"
60
70
  #include "test_util/sync_point.h"
61
71
  #include "util/coding.h"
62
72
  #include "util/crc32c.h"
@@ -69,83 +79,10 @@ extern const uint64_t kBlockBasedTableMagicNumber;
69
79
  extern const std::string kHashIndexPrefixesBlock;
70
80
  extern const std::string kHashIndexPrefixesMetadataBlock;
71
81
 
72
-
73
- // Found that 256 KB readahead size provides the best performance, based on
74
- // experiments, for auto readahead. Experiment data is in PR #3282.
75
- const size_t BlockBasedTable::kMaxAutoReadaheadSize = 256 * 1024;
76
-
77
82
  BlockBasedTable::~BlockBasedTable() {
78
83
  delete rep_;
79
84
  }
80
85
 
81
- std::atomic<uint64_t> BlockBasedTable::next_cache_key_id_(0);
82
-
83
- template <typename TBlocklike>
84
- class BlocklikeTraits;
85
-
86
- template <>
87
- class BlocklikeTraits<BlockContents> {
88
- public:
89
- static BlockContents* Create(BlockContents&& contents,
90
- size_t /* read_amp_bytes_per_bit */,
91
- Statistics* /* statistics */,
92
- bool /* using_zstd */,
93
- const FilterPolicy* /* filter_policy */) {
94
- return new BlockContents(std::move(contents));
95
- }
96
-
97
- static uint32_t GetNumRestarts(const BlockContents& /* contents */) {
98
- return 0;
99
- }
100
- };
101
-
102
- template <>
103
- class BlocklikeTraits<ParsedFullFilterBlock> {
104
- public:
105
- static ParsedFullFilterBlock* Create(BlockContents&& contents,
106
- size_t /* read_amp_bytes_per_bit */,
107
- Statistics* /* statistics */,
108
- bool /* using_zstd */,
109
- const FilterPolicy* filter_policy) {
110
- return new ParsedFullFilterBlock(filter_policy, std::move(contents));
111
- }
112
-
113
- static uint32_t GetNumRestarts(const ParsedFullFilterBlock& /* block */) {
114
- return 0;
115
- }
116
- };
117
-
118
- template <>
119
- class BlocklikeTraits<Block> {
120
- public:
121
- static Block* Create(BlockContents&& contents, size_t read_amp_bytes_per_bit,
122
- Statistics* statistics, bool /* using_zstd */,
123
- const FilterPolicy* /* filter_policy */) {
124
- return new Block(std::move(contents), read_amp_bytes_per_bit, statistics);
125
- }
126
-
127
- static uint32_t GetNumRestarts(const Block& block) {
128
- return block.NumRestarts();
129
- }
130
- };
131
-
132
- template <>
133
- class BlocklikeTraits<UncompressionDict> {
134
- public:
135
- static UncompressionDict* Create(BlockContents&& contents,
136
- size_t /* read_amp_bytes_per_bit */,
137
- Statistics* /* statistics */,
138
- bool using_zstd,
139
- const FilterPolicy* /* filter_policy */) {
140
- return new UncompressionDict(contents.data, std::move(contents.allocation),
141
- using_zstd);
142
- }
143
-
144
- static uint32_t GetNumRestarts(const UncompressionDict& /* dict */) {
145
- return 0;
146
- }
147
- };
148
-
149
86
  namespace {
150
87
  // Read the block identified by "handle" from "file".
151
88
  // The only relevant option is options.verify_checksums for now.
@@ -157,7 +94,7 @@ template <typename TBlocklike>
157
94
  Status ReadBlockFromFile(
158
95
  RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
159
96
  const Footer& footer, const ReadOptions& options, const BlockHandle& handle,
160
- std::unique_ptr<TBlocklike>* result, const ImmutableCFOptions& ioptions,
97
+ std::unique_ptr<TBlocklike>* result, const ImmutableOptions& ioptions,
161
98
  bool do_uncompress, bool maybe_compressed, BlockType block_type,
162
99
  const UncompressionDict& uncompression_dict,
163
100
  const PersistentCacheOptions& cache_options, size_t read_amp_bytes_per_bit,
@@ -173,33 +110,26 @@ Status ReadBlockFromFile(
173
110
  Status s = block_fetcher.ReadBlockContents();
174
111
  if (s.ok()) {
175
112
  result->reset(BlocklikeTraits<TBlocklike>::Create(
176
- std::move(contents), read_amp_bytes_per_bit, ioptions.statistics,
177
- using_zstd, filter_policy));
113
+ std::move(contents), read_amp_bytes_per_bit, ioptions.stats, using_zstd,
114
+ filter_policy));
178
115
  }
179
116
 
180
117
  return s;
181
118
  }
182
119
 
183
- // Delete the entry resided in the cache.
184
- template <class Entry>
185
- void DeleteCachedEntry(const Slice& /*key*/, void* value) {
186
- auto entry = reinterpret_cast<Entry*>(value);
187
- delete entry;
188
- }
189
-
190
120
  // Release the cached entry and decrement its ref count.
191
121
  // Do not force erase
192
122
  void ReleaseCachedEntry(void* arg, void* h) {
193
123
  Cache* cache = reinterpret_cast<Cache*>(arg);
194
124
  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
195
- cache->Release(handle, false /* force_erase */);
125
+ cache->Release(handle, false /* erase_if_last_ref */);
196
126
  }
197
127
 
198
- // For hash based index, return true if prefix_extractor and
199
- // prefix_extractor_block mismatch, false otherwise. This flag will be used
200
- // as total_order_seek via NewIndexIterator
201
- bool PrefixExtractorChanged(const TableProperties* table_properties,
202
- const SliceTransform* prefix_extractor) {
128
+ // For hash based index, return false if table_properties->prefix_extractor_name
129
+ // and prefix_extractor both exist and match, otherwise true.
130
+ inline bool PrefixExtractorChangedHelper(
131
+ const TableProperties* table_properties,
132
+ const SliceTransform* prefix_extractor) {
203
133
  // BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set.
204
134
  // Turn off hash index in prefix_extractor is not set; if prefix_extractor
205
135
  // is set but prefix_extractor_block is not set, also disable hash index
@@ -209,8 +139,7 @@ bool PrefixExtractorChanged(const TableProperties* table_properties,
209
139
  }
210
140
 
211
141
  // prefix_extractor and prefix_extractor_block are both non-empty
212
- if (table_properties->prefix_extractor_name.compare(
213
- prefix_extractor->Name()) != 0) {
142
+ if (table_properties->prefix_extractor_name != prefix_extractor->AsString()) {
214
143
  return true;
215
144
  } else {
216
145
  return false;
@@ -228,7 +157,7 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
228
157
  void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
229
158
  GetContext* get_context,
230
159
  size_t usage) const {
231
- Statistics* const statistics = rep_->ioptions.statistics;
160
+ Statistics* const statistics = rep_->ioptions.stats;
232
161
 
233
162
  PERF_COUNTER_ADD(block_cache_hit_count, 1);
234
163
  PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1,
@@ -286,7 +215,7 @@ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
286
215
 
287
216
  void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
288
217
  GetContext* get_context) const {
289
- Statistics* const statistics = rep_->ioptions.statistics;
218
+ Statistics* const statistics = rep_->ioptions.stats;
290
219
 
291
220
  // TODO: introduce aggregate (not per-level) block cache miss count
292
221
  PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1,
@@ -336,12 +265,9 @@ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
336
265
  }
337
266
  }
338
267
 
339
- void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type,
340
- GetContext* get_context,
341
- size_t usage,
342
- bool redundant) const {
343
- Statistics* const statistics = rep_->ioptions.statistics;
344
-
268
+ void BlockBasedTable::UpdateCacheInsertionMetrics(
269
+ BlockType block_type, GetContext* get_context, size_t usage, bool redundant,
270
+ Statistics* const statistics) {
345
271
  // TODO: introduce perf counters for block cache insertions
346
272
  if (get_context) {
347
273
  ++get_context->get_context_stats_.num_cache_add;
@@ -430,9 +356,17 @@ void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type,
430
356
  }
431
357
 
432
358
  Cache::Handle* BlockBasedTable::GetEntryFromCache(
433
- Cache* block_cache, const Slice& key, BlockType block_type,
434
- GetContext* get_context) const {
435
- auto cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics);
359
+ const CacheTier& cache_tier, Cache* block_cache, const Slice& key,
360
+ BlockType block_type, const bool wait, GetContext* get_context,
361
+ const Cache::CacheItemHelper* cache_helper,
362
+ const Cache::CreateCallback& create_cb, Cache::Priority priority) const {
363
+ Cache::Handle* cache_handle = nullptr;
364
+ if (cache_tier == CacheTier::kNonVolatileBlockTier) {
365
+ cache_handle = block_cache->Lookup(key, cache_helper, create_cb, priority,
366
+ wait, rep_->ioptions.statistics.get());
367
+ } else {
368
+ cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics.get());
369
+ }
436
370
 
437
371
  if (cache_handle != nullptr) {
438
372
  UpdateCacheHitMetrics(block_type, get_context,
@@ -444,28 +378,21 @@ Cache::Handle* BlockBasedTable::GetEntryFromCache(
444
378
  return cache_handle;
445
379
  }
446
380
 
447
- // Helper function to setup the cache key's prefix for the Table.
448
- void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
449
- assert(kMaxCacheKeyPrefixSize >= 10);
450
- rep->cache_key_prefix_size = 0;
451
- rep->compressed_cache_key_prefix_size = 0;
452
- if (rep->table_options.block_cache != nullptr) {
453
- GenerateCachePrefix<Cache, FSRandomAccessFile>(
454
- rep->table_options.block_cache.get(), rep->file->file(),
455
- &rep->cache_key_prefix[0], &rep->cache_key_prefix_size);
456
- }
457
- if (rep->table_options.persistent_cache != nullptr) {
458
- GenerateCachePrefix<PersistentCache, FSRandomAccessFile>(
459
- rep->table_options.persistent_cache.get(), rep->file->file(),
460
- &rep->persistent_cache_key_prefix[0],
461
- &rep->persistent_cache_key_prefix_size);
462
- }
463
- if (rep->table_options.block_cache_compressed != nullptr) {
464
- GenerateCachePrefix<Cache, FSRandomAccessFile>(
465
- rep->table_options.block_cache_compressed.get(), rep->file->file(),
466
- &rep->compressed_cache_key_prefix[0],
467
- &rep->compressed_cache_key_prefix_size);
381
+ template <typename TBlocklike>
382
+ Status BlockBasedTable::InsertEntryToCache(
383
+ const CacheTier& cache_tier, Cache* block_cache, const Slice& key,
384
+ const Cache::CacheItemHelper* cache_helper,
385
+ std::unique_ptr<TBlocklike>& block_holder, size_t charge,
386
+ Cache::Handle** cache_handle, Cache::Priority priority) const {
387
+ Status s = Status::OK();
388
+ if (cache_tier == CacheTier::kNonVolatileBlockTier) {
389
+ s = block_cache->Insert(key, block_holder.get(), cache_helper, charge,
390
+ cache_handle, priority);
391
+ } else {
392
+ s = block_cache->Insert(key, block_holder.get(), charge,
393
+ cache_helper->del_cb, cache_handle, priority);
468
394
  }
395
+ return s;
469
396
  }
470
397
 
471
398
  namespace {
@@ -562,31 +489,79 @@ Status GetGlobalSequenceNumber(const TableProperties& table_properties,
562
489
  }
563
490
  } // namespace
564
491
 
565
- Slice BlockBasedTable::GetCacheKey(const char* cache_key_prefix,
566
- size_t cache_key_prefix_size,
567
- const BlockHandle& handle, char* cache_key) {
568
- assert(cache_key != nullptr);
569
- assert(cache_key_prefix_size != 0);
570
- assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
571
- memcpy(cache_key, cache_key_prefix, cache_key_prefix_size);
572
- char* end =
573
- EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset());
574
- return Slice(cache_key, static_cast<size_t>(end - cache_key));
492
+ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
493
+ const std::string& cur_db_session_id,
494
+ uint64_t cur_file_number,
495
+ uint64_t file_size,
496
+ OffsetableCacheKey* out_base_cache_key,
497
+ bool* out_is_stable) {
498
+ // Use a stable cache key if sufficient data is in table properties
499
+ std::string db_session_id;
500
+ uint64_t file_num;
501
+ std::string db_id;
502
+ if (properties && !properties->db_session_id.empty() &&
503
+ properties->orig_file_number > 0) {
504
+ // (Newer SST file case)
505
+ // We must have both properties to get a stable unique id because
506
+ // CreateColumnFamilyWithImport or IngestExternalFiles can change the
507
+ // file numbers on a file.
508
+ db_session_id = properties->db_session_id;
509
+ file_num = properties->orig_file_number;
510
+ // Less critical, populated in earlier release than above
511
+ db_id = properties->db_id;
512
+ if (out_is_stable) {
513
+ *out_is_stable = true;
514
+ }
515
+ } else {
516
+ // (Old SST file case)
517
+ // We use (unique) cache keys based on current identifiers. These are at
518
+ // least stable across table file close and re-open, but not across
519
+ // different DBs nor DB close and re-open.
520
+ db_session_id = cur_db_session_id;
521
+ file_num = cur_file_number;
522
+ // Plumbing through the DB ID to here would be annoying, and of limited
523
+ // value because of the case of VersionSet::Recover opening some table
524
+ // files and later setting the DB ID. So we just rely on uniqueness
525
+ // level provided by session ID.
526
+ db_id = "unknown";
527
+ if (out_is_stable) {
528
+ *out_is_stable = false;
529
+ }
530
+ }
531
+
532
+ // Too many tests to update to get these working
533
+ // assert(file_num > 0);
534
+ // assert(!db_session_id.empty());
535
+ // assert(!db_id.empty());
536
+
537
+ // Minimum block size is 5 bytes; therefore we can trim off two lower bits
538
+ // from offets. See GetCacheKey.
539
+ *out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
540
+ /*max_offset*/ file_size >> 2);
541
+ }
542
+
543
+ CacheKey BlockBasedTable::GetCacheKey(const OffsetableCacheKey& base_cache_key,
544
+ const BlockHandle& handle) {
545
+ // Minimum block size is 5 bytes; therefore we can trim off two lower bits
546
+ // from offet.
547
+ return base_cache_key.WithOffset(handle.offset() >> 2);
575
548
  }
576
549
 
577
550
  Status BlockBasedTable::Open(
578
- const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
551
+ const ReadOptions& read_options, const ImmutableOptions& ioptions,
579
552
  const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
580
553
  const InternalKeyComparator& internal_comparator,
581
554
  std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
582
555
  std::unique_ptr<TableReader>* table_reader,
583
- const SliceTransform* prefix_extractor,
556
+ std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr,
557
+ const std::shared_ptr<const SliceTransform>& prefix_extractor,
584
558
  const bool prefetch_index_and_filter_in_cache, const bool skip_filters,
585
559
  const int level, const bool immortal_table,
586
560
  const SequenceNumber largest_seqno, const bool force_direct_prefetch,
587
561
  TailPrefetchStats* tail_prefetch_stats,
588
562
  BlockCacheTracer* const block_cache_tracer,
589
- size_t max_file_size_for_l0_meta_pin) {
563
+ size_t max_file_size_for_l0_meta_pin, const std::string& cur_db_session_id,
564
+ uint64_t cur_file_num) {
590
565
  table_reader->reset();
591
566
 
592
567
  Status s;
@@ -617,7 +592,8 @@ Status BlockBasedTable::Open(
617
592
  } else {
618
593
  // Should not prefetch for mmap mode.
619
594
  prefetch_buffer.reset(new FilePrefetchBuffer(
620
- nullptr, 0, 0, false /* enable */, true /* track_min_offset */));
595
+ 0 /* readahead_size */, 0 /* max_readahead_size */, false /* enable */,
596
+ true /* track_min_offset */));
621
597
  }
622
598
 
623
599
  // Read in the following order:
@@ -629,7 +605,7 @@ Status BlockBasedTable::Open(
629
605
  // 6. [meta block: index]
630
606
  // 7. [meta block: filter]
631
607
  IOOptions opts;
632
- s = PrepareIOFromReadOptions(ro, file->env(), opts);
608
+ s = file->PrepareIOOptions(ro, opts);
633
609
  if (s.ok()) {
634
610
  s = ReadFooterFromFile(opts, file.get(), prefetch_buffer.get(), file_size,
635
611
  &footer, kBlockBasedTableMagicNumber);
@@ -637,39 +613,34 @@ Status BlockBasedTable::Open(
637
613
  if (!s.ok()) {
638
614
  return s;
639
615
  }
640
- if (!BlockBasedTableSupportedVersion(footer.version())) {
616
+ if (!IsSupportedFormatVersion(footer.format_version())) {
641
617
  return Status::Corruption(
642
618
  "Unknown Footer version. Maybe this file was created with newer "
643
619
  "version of RocksDB?");
644
620
  }
645
621
 
646
- // We've successfully read the footer. We are ready to serve requests.
647
- // Better not mutate rep_ after the creation. eg. internal_prefix_transform
648
- // raw pointer will be used to create HashIndexReader, whose reset may
649
- // access a dangling pointer.
650
622
  BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
651
623
  Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
652
624
  internal_comparator, skip_filters,
653
625
  file_size, level, immortal_table);
654
626
  rep->file = std::move(file);
655
627
  rep->footer = footer;
656
- rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
628
+ // We've successfully read the footer. We are ready to serve requests.
629
+ // Better not mutate rep_ after the creation. eg. internal_prefix_transform
630
+ // raw pointer will be used to create HashIndexReader, whose reset may
631
+ // access a dangling pointer.
657
632
  // We need to wrap data with internal_prefix_transform to make sure it can
658
633
  // handle prefix correctly.
634
+ // FIXME: is changed prefix_extractor handled anywhere for hash index?
659
635
  if (prefix_extractor != nullptr) {
660
636
  rep->internal_prefix_transform.reset(
661
- new InternalKeySliceTransform(prefix_extractor));
637
+ new InternalKeySliceTransform(prefix_extractor.get()));
662
638
  }
663
- SetupCacheKeyPrefix(rep);
664
- std::unique_ptr<BlockBasedTable> new_table(
665
- new BlockBasedTable(rep, block_cache_tracer));
666
639
 
667
- // page cache options
668
- rep->persistent_cache_options =
669
- PersistentCacheOptions(rep->table_options.persistent_cache,
670
- std::string(rep->persistent_cache_key_prefix,
671
- rep->persistent_cache_key_prefix_size),
672
- rep->ioptions.statistics);
640
+ // For fully portable/stable cache keys, we need to read the properties
641
+ // block before setting up cache keys. TODO: consider setting up a bootstrap
642
+ // cache key for PersistentCache to use for metaindex and properties blocks.
643
+ rep->persistent_cache_options = PersistentCacheOptions();
673
644
 
674
645
  // Meta-blocks are not dictionary compressed. Explicitly set the dictionary
675
646
  // handle to null, otherwise it may be seen as uninitialized during the below
@@ -677,6 +648,8 @@ Status BlockBasedTable::Open(
677
648
  rep->compression_dict_handle = BlockHandle::NullBlockHandle();
678
649
 
679
650
  // Read metaindex
651
+ std::unique_ptr<BlockBasedTable> new_table(
652
+ new BlockBasedTable(rep, block_cache_tracer));
680
653
  std::unique_ptr<Block> metaindex;
681
654
  std::unique_ptr<InternalIterator> metaindex_iter;
682
655
  s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
@@ -692,6 +665,39 @@ Status BlockBasedTable::Open(
692
665
  if (!s.ok()) {
693
666
  return s;
694
667
  }
668
+ if (!PrefixExtractorChangedHelper(rep->table_properties.get(),
669
+ prefix_extractor.get())) {
670
+ // Establish fast path for unchanged prefix_extractor
671
+ rep->table_prefix_extractor = prefix_extractor;
672
+ } else {
673
+ // Current prefix_extractor doesn't match table
674
+ #ifndef ROCKSDB_LITE
675
+ if (rep->table_properties) {
676
+ //**TODO: If/When the DBOptions has a registry in it, the ConfigOptions
677
+ // will need to use it
678
+ ConfigOptions config_options;
679
+ Status st = SliceTransform::CreateFromString(
680
+ config_options, rep->table_properties->prefix_extractor_name,
681
+ &(rep->table_prefix_extractor));
682
+ if (!st.ok()) {
683
+ //**TODO: Should this be error be returned or swallowed?
684
+ ROCKS_LOG_ERROR(rep->ioptions.logger,
685
+ "Failed to create prefix extractor[%s]: %s",
686
+ rep->table_properties->prefix_extractor_name.c_str(),
687
+ st.ToString().c_str());
688
+ }
689
+ }
690
+ #endif // ROCKSDB_LITE
691
+ }
692
+
693
+ // With properties loaded, we can set up portable/stable cache keys
694
+ SetupBaseCacheKey(rep->table_properties.get(), cur_db_session_id,
695
+ cur_file_num, file_size, &rep->base_cache_key);
696
+
697
+ rep->persistent_cache_options =
698
+ PersistentCacheOptions(rep->table_options.persistent_cache,
699
+ rep->base_cache_key, rep->ioptions.stats);
700
+
695
701
  s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
696
702
  metaindex_iter.get(), internal_comparator,
697
703
  &lookup_context);
@@ -711,10 +717,22 @@ Status BlockBasedTable::Open(
711
717
  tail_prefetch_stats->RecordEffectiveSize(
712
718
  static_cast<size_t>(file_size) - prefetch_buffer->min_offset_read());
713
719
  }
720
+ }
714
721
 
715
- *table_reader = std::move(new_table);
722
+ if (s.ok() && table_reader_cache_res_mgr) {
723
+ std::size_t mem_usage = new_table->ApproximateMemoryUsage();
724
+ s = table_reader_cache_res_mgr->MakeCacheReservation(
725
+ mem_usage, &(rep->table_reader_cache_res_handle));
726
+ if (s.IsIncomplete()) {
727
+ s = Status::MemoryLimit(
728
+ "Can't allocate BlockBasedTableReader due to memory limit based on "
729
+ "cache capacity for memory allocation");
730
+ }
716
731
  }
717
732
 
733
+ if (s.ok()) {
734
+ *table_reader = std::move(new_table);
735
+ }
718
736
  return s;
719
737
  }
720
738
 
@@ -753,53 +771,23 @@ Status BlockBasedTable::PrefetchTail(
753
771
  // Try file system prefetch
754
772
  if (!file->use_direct_io() && !force_direct_prefetch) {
755
773
  if (!file->Prefetch(prefetch_off, prefetch_len).IsNotSupported()) {
756
- prefetch_buffer->reset(
757
- new FilePrefetchBuffer(nullptr, 0, 0, false, true));
774
+ prefetch_buffer->reset(new FilePrefetchBuffer(
775
+ 0 /* readahead_size */, 0 /* max_readahead_size */,
776
+ false /* enable */, true /* track_min_offset */));
758
777
  return Status::OK();
759
778
  }
760
779
  }
761
780
 
762
781
  // Use `FilePrefetchBuffer`
763
- prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, true, true));
782
+ prefetch_buffer->reset(
783
+ new FilePrefetchBuffer(0 /* readahead_size */, 0 /* max_readahead_size */,
784
+ true /* enable */, true /* track_min_offset */));
764
785
  IOOptions opts;
765
- Status s = PrepareIOFromReadOptions(ro, file->env(), opts);
786
+ Status s = file->PrepareIOOptions(ro, opts);
766
787
  if (s.ok()) {
767
- s = (*prefetch_buffer)->Prefetch(opts, file, prefetch_off, prefetch_len);
768
- }
769
- return s;
770
- }
771
-
772
- Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
773
- const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
774
- const Slice& handle_value, TableProperties** table_properties) {
775
- assert(table_properties != nullptr);
776
- // If this is an external SST file ingested with write_global_seqno set to
777
- // true, then we expect the checksum mismatch because checksum was written
778
- // by SstFileWriter, but its global seqno in the properties block may have
779
- // been changed during ingestion. In this case, we read the properties
780
- // block, copy it to a memory buffer, change the global seqno to its
781
- // original value, i.e. 0, and verify the checksum again.
782
- BlockHandle props_block_handle;
783
- CacheAllocationPtr tmp_buf;
784
- Status s = ReadProperties(ro, handle_value, rep_->file.get(), prefetch_buffer,
785
- rep_->footer, rep_->ioptions, table_properties,
786
- false /* verify_checksum */, &props_block_handle,
787
- &tmp_buf, false /* compression_type_missing */,
788
- nullptr /* memory_allocator */);
789
- if (s.ok() && tmp_buf) {
790
- const auto seqno_pos_iter =
791
- (*table_properties)
792
- ->properties_offsets.find(
793
- ExternalSstFilePropertyNames::kGlobalSeqno);
794
- size_t block_size = static_cast<size_t>(props_block_handle.size());
795
- if (seqno_pos_iter != (*table_properties)->properties_offsets.end()) {
796
- uint64_t global_seqno_offset = seqno_pos_iter->second;
797
- EncodeFixed64(
798
- tmp_buf.get() + global_seqno_offset - props_block_handle.offset(), 0);
799
- }
800
- s = ROCKSDB_NAMESPACE::VerifyBlockChecksum(
801
- rep_->footer.checksum(), tmp_buf.get(), block_size,
802
- rep_->file->file_name(), props_block_handle.offset());
788
+ s = (*prefetch_buffer)
789
+ ->Prefetch(opts, file, prefetch_off, prefetch_len,
790
+ ro.rate_limiter_priority);
803
791
  }
804
792
  return s;
805
793
  }
@@ -807,45 +795,32 @@ Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
807
795
  Status BlockBasedTable::ReadPropertiesBlock(
808
796
  const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
809
797
  InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
810
- bool found_properties_block = true;
811
798
  Status s;
812
- s = SeekToPropertiesBlock(meta_iter, &found_properties_block);
799
+ BlockHandle handle;
800
+ s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle);
813
801
 
814
802
  if (!s.ok()) {
815
- ROCKS_LOG_WARN(rep_->ioptions.info_log,
803
+ ROCKS_LOG_WARN(rep_->ioptions.logger,
816
804
  "Error when seeking to properties block from file: %s",
817
805
  s.ToString().c_str());
818
- } else if (found_properties_block) {
806
+ } else if (!handle.IsNull()) {
819
807
  s = meta_iter->status();
820
- TableProperties* table_properties = nullptr;
808
+ std::unique_ptr<TableProperties> table_properties;
821
809
  if (s.ok()) {
822
- s = ReadProperties(
823
- ro, meta_iter->value(), rep_->file.get(), prefetch_buffer,
824
- rep_->footer, rep_->ioptions, &table_properties,
825
- true /* verify_checksum */, nullptr /* ret_block_handle */,
826
- nullptr /* ret_block_contents */,
827
- false /* compression_type_missing */, nullptr /* memory_allocator */);
810
+ s = ReadTablePropertiesHelper(
811
+ ro, handle, rep_->file.get(), prefetch_buffer, rep_->footer,
812
+ rep_->ioptions, &table_properties, nullptr /* memory_allocator */);
828
813
  }
829
814
  IGNORE_STATUS_IF_ERROR(s);
830
815
 
831
- if (s.IsCorruption()) {
832
- s = TryReadPropertiesWithGlobalSeqno(
833
- ro, prefetch_buffer, meta_iter->value(), &table_properties);
834
- IGNORE_STATUS_IF_ERROR(s);
835
- }
836
- std::unique_ptr<TableProperties> props_guard;
837
- if (table_properties != nullptr) {
838
- props_guard.reset(table_properties);
839
- }
840
-
841
816
  if (!s.ok()) {
842
- ROCKS_LOG_WARN(rep_->ioptions.info_log,
817
+ ROCKS_LOG_WARN(rep_->ioptions.logger,
843
818
  "Encountered error while reading data from properties "
844
819
  "block %s",
845
820
  s.ToString().c_str());
846
821
  } else {
847
822
  assert(table_properties != nullptr);
848
- rep_->table_properties.reset(props_guard.release());
823
+ rep_->table_properties = std::move(table_properties);
849
824
  rep_->blocks_maybe_compressed =
850
825
  rep_->table_properties->compression_name !=
851
826
  CompressionTypeToString(kNoCompression);
@@ -856,26 +831,19 @@ Status BlockBasedTable::ReadPropertiesBlock(
856
831
  CompressionTypeToString(kZSTDNotFinalCompression));
857
832
  }
858
833
  } else {
859
- ROCKS_LOG_ERROR(rep_->ioptions.info_log,
834
+ ROCKS_LOG_ERROR(rep_->ioptions.logger,
860
835
  "Cannot find Properties block from file.");
861
836
  }
862
- #ifndef ROCKSDB_LITE
863
- if (rep_->table_properties) {
864
- ParseSliceTransform(rep_->table_properties->prefix_extractor_name,
865
- &(rep_->table_prefix_extractor));
866
- }
867
- #endif // ROCKSDB_LITE
868
837
 
869
838
  // Read the table properties, if provided.
870
839
  if (rep_->table_properties) {
871
840
  rep_->whole_key_filtering &=
872
841
  IsFeatureSupported(*(rep_->table_properties),
873
842
  BlockBasedTablePropertyNames::kWholeKeyFiltering,
874
- rep_->ioptions.info_log);
875
- rep_->prefix_filtering &=
876
- IsFeatureSupported(*(rep_->table_properties),
877
- BlockBasedTablePropertyNames::kPrefixFiltering,
878
- rep_->ioptions.info_log);
843
+ rep_->ioptions.logger);
844
+ rep_->prefix_filtering &= IsFeatureSupported(
845
+ *(rep_->table_properties),
846
+ BlockBasedTablePropertyNames::kPrefixFiltering, rep_->ioptions.logger);
879
847
 
880
848
  rep_->index_key_includes_seq =
881
849
  rep_->table_properties->index_key_is_user_key == 0;
@@ -898,7 +866,7 @@ Status BlockBasedTable::ReadPropertiesBlock(
898
866
  s = GetGlobalSequenceNumber(*(rep_->table_properties), largest_seqno,
899
867
  &(rep_->global_seqno));
900
868
  if (!s.ok()) {
901
- ROCKS_LOG_ERROR(rep_->ioptions.info_log, "%s", s.ToString().c_str());
869
+ ROCKS_LOG_ERROR(rep_->ioptions.logger, "%s", s.ToString().c_str());
902
870
  }
903
871
  }
904
872
  return s;
@@ -910,15 +878,14 @@ Status BlockBasedTable::ReadRangeDelBlock(
910
878
  const InternalKeyComparator& internal_comparator,
911
879
  BlockCacheLookupContext* lookup_context) {
912
880
  Status s;
913
- bool found_range_del_block;
914
881
  BlockHandle range_del_handle;
915
- s = SeekToRangeDelBlock(meta_iter, &found_range_del_block, &range_del_handle);
882
+ s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle);
916
883
  if (!s.ok()) {
917
884
  ROCKS_LOG_WARN(
918
- rep_->ioptions.info_log,
885
+ rep_->ioptions.logger,
919
886
  "Error when seeking to range delete tombstones block from file: %s",
920
887
  s.ToString().c_str());
921
- } else if (found_range_del_block && !range_del_handle.IsNull()) {
888
+ } else if (!range_del_handle.IsNull()) {
922
889
  std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
923
890
  read_options, range_del_handle,
924
891
  /*input_iter=*/nullptr, BlockType::kRangeDeletion,
@@ -927,7 +894,7 @@ Status BlockBasedTable::ReadRangeDelBlock(
927
894
  s = iter->status();
928
895
  if (!s.ok()) {
929
896
  ROCKS_LOG_WARN(
930
- rep_->ioptions.info_log,
897
+ rep_->ioptions.logger,
931
898
  "Encountered error while reading data from range del block %s",
932
899
  s.ToString().c_str());
933
900
  IGNORE_STATUS_IF_ERROR(s);
@@ -946,33 +913,59 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
946
913
  const BlockBasedTableOptions& table_options, const int level,
947
914
  size_t file_size, size_t max_file_size_for_l0_meta_pin,
948
915
  BlockCacheLookupContext* lookup_context) {
949
- Status s;
950
-
951
916
  // Find filter handle and filter type
952
917
  if (rep_->filter_policy) {
953
- for (auto filter_type :
954
- {Rep::FilterType::kFullFilter, Rep::FilterType::kPartitionedFilter,
955
- Rep::FilterType::kBlockFilter}) {
956
- std::string prefix;
957
- switch (filter_type) {
958
- case Rep::FilterType::kFullFilter:
959
- prefix = kFullFilterBlockPrefix;
960
- break;
961
- case Rep::FilterType::kPartitionedFilter:
962
- prefix = kPartitionedFilterBlockPrefix;
963
- break;
964
- case Rep::FilterType::kBlockFilter:
965
- prefix = kFilterBlockPrefix;
918
+ auto name = rep_->filter_policy->CompatibilityName();
919
+ bool builtin_compatible =
920
+ strcmp(name, BuiltinFilterPolicy::kCompatibilityName()) == 0;
921
+
922
+ for (const auto& [filter_type, prefix] :
923
+ {std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix),
924
+ std::make_pair(Rep::FilterType::kPartitionedFilter,
925
+ kPartitionedFilterBlockPrefix),
926
+ std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) {
927
+ if (builtin_compatible) {
928
+ // This code is only here to deal with a hiccup in early 7.0.x where
929
+ // there was an unintentional name change in the SST files metadata.
930
+ // It should be OK to remove this in the future (late 2022) and just
931
+ // have the 'else' code.
932
+ // NOTE: the test:: names below are likely not needed but included
933
+ // out of caution
934
+ static const std::unordered_set<std::string> kBuiltinNameAndAliases = {
935
+ BuiltinFilterPolicy::kCompatibilityName(),
936
+ test::LegacyBloomFilterPolicy::kClassName(),
937
+ test::FastLocalBloomFilterPolicy::kClassName(),
938
+ test::Standard128RibbonFilterPolicy::kClassName(),
939
+ DeprecatedBlockBasedBloomFilterPolicy::kClassName(),
940
+ BloomFilterPolicy::kClassName(),
941
+ RibbonFilterPolicy::kClassName(),
942
+ };
943
+
944
+ // For efficiency, do a prefix seek and see if the first match is
945
+ // good.
946
+ meta_iter->Seek(prefix);
947
+ if (meta_iter->status().ok() && meta_iter->Valid()) {
948
+ Slice key = meta_iter->key();
949
+ if (key.starts_with(prefix)) {
950
+ key.remove_prefix(prefix.size());
951
+ if (kBuiltinNameAndAliases.find(key.ToString()) !=
952
+ kBuiltinNameAndAliases.end()) {
953
+ Slice v = meta_iter->value();
954
+ Status s = rep_->filter_handle.DecodeFrom(&v);
955
+ if (s.ok()) {
956
+ rep_->filter_type = filter_type;
957
+ break;
958
+ }
959
+ }
960
+ }
961
+ }
962
+ } else {
963
+ std::string filter_block_key = prefix + name;
964
+ if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
965
+ .ok()) {
966
+ rep_->filter_type = filter_type;
966
967
  break;
967
- default:
968
- assert(0);
969
- }
970
- std::string filter_block_key = prefix;
971
- filter_block_key.append(rep_->filter_policy->Name());
972
- if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
973
- .ok()) {
974
- rep_->filter_type = filter_type;
975
- break;
968
+ }
976
969
  }
977
970
  }
978
971
  }
@@ -981,9 +974,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
981
974
  rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
982
975
 
983
976
  // Find compression dictionary handle
984
- bool found_compression_dict = false;
985
- s = SeekToCompressionDictBlock(meta_iter, &found_compression_dict,
986
- &rep_->compression_dict_handle);
977
+ Status s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName,
978
+ &rep_->compression_dict_handle);
987
979
  if (!s.ok()) {
988
980
  return s;
989
981
  }
@@ -1038,6 +1030,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
1038
1030
  ? pin_top_level_index
1039
1031
  : pin_unpartitioned;
1040
1032
  // prefetch the first level of index
1033
+ // WART: this might be redundant (unnecessary cache hit) if !pin_index,
1034
+ // depending on prepopulate_block_cache option
1041
1035
  const bool prefetch_index = prefetch_all || pin_index;
1042
1036
 
1043
1037
  std::unique_ptr<IndexReader> index_reader;
@@ -1066,6 +1060,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
1066
1060
  ? pin_top_level_index
1067
1061
  : pin_unpartitioned;
1068
1062
  // prefetch the first level of filter
1063
+ // WART: this might be redundant (unnecessary cache hit) if !pin_filter,
1064
+ // depending on prepopulate_block_cache option
1069
1065
  const bool prefetch_filter = prefetch_all || pin_filter;
1070
1066
 
1071
1067
  if (rep_->filter_policy) {
@@ -1126,6 +1122,11 @@ std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
1126
1122
 
1127
1123
  size_t BlockBasedTable::ApproximateMemoryUsage() const {
1128
1124
  size_t usage = 0;
1125
+ if (rep_) {
1126
+ usage += rep_->ApproximateMemoryUsage();
1127
+ } else {
1128
+ return usage;
1129
+ }
1129
1130
  if (rep_->filter) {
1130
1131
  usage += rep_->filter->ApproximateMemoryUsage();
1131
1132
  }
@@ -1135,6 +1136,9 @@ size_t BlockBasedTable::ApproximateMemoryUsage() const {
1135
1136
  if (rep_->uncompression_dict_reader) {
1136
1137
  usage += rep_->uncompression_dict_reader->ApproximateMemoryUsage();
1137
1138
  }
1139
+ if (rep_->table_properties) {
1140
+ usage += rep_->table_properties->ApproximateMemoryUsage();
1141
+ }
1138
1142
  return usage;
1139
1143
  }
1140
1144
 
@@ -1158,7 +1162,7 @@ Status BlockBasedTable::ReadMetaIndexBlock(
1158
1162
  nullptr /* filter_policy */);
1159
1163
 
1160
1164
  if (!s.ok()) {
1161
- ROCKS_LOG_ERROR(rep_->ioptions.info_log,
1165
+ ROCKS_LOG_ERROR(rep_->ioptions.logger,
1162
1166
  "Encountered error while reading data from properties"
1163
1167
  " block %s",
1164
1168
  s.ToString().c_str());
@@ -1167,33 +1171,48 @@ Status BlockBasedTable::ReadMetaIndexBlock(
1167
1171
 
1168
1172
  *metaindex_block = std::move(metaindex);
1169
1173
  // meta block uses bytewise comparator.
1170
- iter->reset(metaindex_block->get()->NewDataIterator(
1171
- BytewiseComparator(), kDisableGlobalSequenceNumber));
1174
+ iter->reset(metaindex_block->get()->NewMetaIterator());
1172
1175
  return Status::OK();
1173
1176
  }
1174
1177
 
1175
1178
  template <typename TBlocklike>
1176
1179
  Status BlockBasedTable::GetDataBlockFromCache(
1177
- const Slice& block_cache_key, const Slice& compressed_block_cache_key,
1178
- Cache* block_cache, Cache* block_cache_compressed,
1180
+ const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
1179
1181
  const ReadOptions& read_options, CachableEntry<TBlocklike>* block,
1180
1182
  const UncompressionDict& uncompression_dict, BlockType block_type,
1181
- GetContext* get_context) const {
1183
+ const bool wait, GetContext* get_context) const {
1182
1184
  const size_t read_amp_bytes_per_bit =
1183
1185
  block_type == BlockType::kData
1184
1186
  ? rep_->table_options.read_amp_bytes_per_bit
1185
1187
  : 0;
1186
1188
  assert(block);
1187
1189
  assert(block->IsEmpty());
1190
+ const Cache::Priority priority =
1191
+ rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
1192
+ (block_type == BlockType::kFilter ||
1193
+ block_type == BlockType::kCompressionDictionary ||
1194
+ block_type == BlockType::kIndex)
1195
+ ? Cache::Priority::HIGH
1196
+ : Cache::Priority::LOW;
1188
1197
 
1189
1198
  Status s;
1190
1199
  BlockContents* compressed_block = nullptr;
1191
1200
  Cache::Handle* block_cache_compressed_handle = nullptr;
1201
+ Statistics* statistics = rep_->ioptions.statistics.get();
1202
+ bool using_zstd = rep_->blocks_definitely_zstd_compressed;
1203
+ const FilterPolicy* filter_policy = rep_->filter_policy;
1204
+ Cache::CreateCallback create_cb = GetCreateCallback<TBlocklike>(
1205
+ read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
1192
1206
 
1193
1207
  // Lookup uncompressed cache first
1194
1208
  if (block_cache != nullptr) {
1195
- auto cache_handle = GetEntryFromCache(block_cache, block_cache_key,
1196
- block_type, get_context);
1209
+ assert(!cache_key.empty());
1210
+ Cache::Handle* cache_handle = nullptr;
1211
+ cache_handle = GetEntryFromCache(
1212
+ rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
1213
+ block_type, wait, get_context,
1214
+ BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), create_cb,
1215
+ priority);
1197
1216
  if (cache_handle != nullptr) {
1198
1217
  block->SetCachedValue(
1199
1218
  reinterpret_cast<TBlocklike*>(block_cache->Value(cache_handle)),
@@ -1209,11 +1228,20 @@ Status BlockBasedTable::GetDataBlockFromCache(
1209
1228
  return s;
1210
1229
  }
1211
1230
 
1212
- assert(!compressed_block_cache_key.empty());
1213
- block_cache_compressed_handle =
1214
- block_cache_compressed->Lookup(compressed_block_cache_key);
1215
-
1216
- Statistics* statistics = rep_->ioptions.statistics;
1231
+ assert(!cache_key.empty());
1232
+ BlockContents contents;
1233
+ if (rep_->ioptions.lowest_used_cache_tier ==
1234
+ CacheTier::kNonVolatileBlockTier) {
1235
+ Cache::CreateCallback create_cb_special = GetCreateCallback<BlockContents>(
1236
+ read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
1237
+ block_cache_compressed_handle = block_cache_compressed->Lookup(
1238
+ cache_key,
1239
+ BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
1240
+ create_cb_special, priority, true);
1241
+ } else {
1242
+ block_cache_compressed_handle =
1243
+ block_cache_compressed->Lookup(cache_key, statistics);
1244
+ }
1217
1245
 
1218
1246
  // if we found in the compressed cache, then uncompress and insert into
1219
1247
  // uncompressed cache
@@ -1226,11 +1254,10 @@ Status BlockBasedTable::GetDataBlockFromCache(
1226
1254
  RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT);
1227
1255
  compressed_block = reinterpret_cast<BlockContents*>(
1228
1256
  block_cache_compressed->Value(block_cache_compressed_handle));
1229
- CompressionType compression_type = compressed_block->get_compression_type();
1257
+ CompressionType compression_type = GetBlockCompressionType(*compressed_block);
1230
1258
  assert(compression_type != kNoCompression);
1231
1259
 
1232
1260
  // Retrieve the uncompressed contents into a new buffer
1233
- BlockContents contents;
1234
1261
  UncompressionContext context(compression_type);
1235
1262
  UncompressionInfo info(context, uncompression_dict, compression_type);
1236
1263
  s = UncompressBlockContents(
@@ -1238,7 +1265,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
1238
1265
  &contents, rep_->table_options.format_version, rep_->ioptions,
1239
1266
  GetMemoryAllocator(rep_->table_options));
1240
1267
 
1241
- // Insert uncompressed block into block cache
1268
+ // Insert uncompressed block into block cache, the priority is based on the
1269
+ // data block type.
1242
1270
  if (s.ok()) {
1243
1271
  std::unique_ptr<TBlocklike> block_holder(
1244
1272
  BlocklikeTraits<TBlocklike>::Create(
@@ -1250,15 +1278,17 @@ Status BlockBasedTable::GetDataBlockFromCache(
1250
1278
  read_options.fill_cache) {
1251
1279
  size_t charge = block_holder->ApproximateMemoryUsage();
1252
1280
  Cache::Handle* cache_handle = nullptr;
1253
- s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
1254
- &DeleteCachedEntry<TBlocklike>, &cache_handle);
1281
+ s = InsertEntryToCache(
1282
+ rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
1283
+ BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
1284
+ block_holder, charge, &cache_handle, priority);
1255
1285
  if (s.ok()) {
1256
1286
  assert(cache_handle != nullptr);
1257
1287
  block->SetCachedValue(block_holder.release(), block_cache,
1258
1288
  cache_handle);
1259
1289
 
1260
1290
  UpdateCacheInsertionMetrics(block_type, get_context, charge,
1261
- s.IsOkOverwritten());
1291
+ s.IsOkOverwritten(), rep_->ioptions.stats);
1262
1292
  } else {
1263
1293
  RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
1264
1294
  }
@@ -1274,14 +1304,13 @@ Status BlockBasedTable::GetDataBlockFromCache(
1274
1304
 
1275
1305
  template <typename TBlocklike>
1276
1306
  Status BlockBasedTable::PutDataBlockToCache(
1277
- const Slice& block_cache_key, const Slice& compressed_block_cache_key,
1278
- Cache* block_cache, Cache* block_cache_compressed,
1307
+ const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
1279
1308
  CachableEntry<TBlocklike>* cached_block, BlockContents* raw_block_contents,
1280
1309
  CompressionType raw_block_comp_type,
1281
1310
  const UncompressionDict& uncompression_dict,
1282
1311
  MemoryAllocator* memory_allocator, BlockType block_type,
1283
1312
  GetContext* get_context) const {
1284
- const ImmutableCFOptions& ioptions = rep_->ioptions;
1313
+ const ImmutableOptions& ioptions = rep_->ioptions;
1285
1314
  const uint32_t format_version = rep_->table_options.format_version;
1286
1315
  const size_t read_amp_bytes_per_bit =
1287
1316
  block_type == BlockType::kData
@@ -1298,7 +1327,7 @@ Status BlockBasedTable::PutDataBlockToCache(
1298
1327
  assert(cached_block->IsEmpty());
1299
1328
 
1300
1329
  Status s;
1301
- Statistics* statistics = ioptions.statistics;
1330
+ Statistics* statistics = ioptions.stats;
1302
1331
 
1303
1332
  std::unique_ptr<TBlocklike> block_holder;
1304
1333
  if (raw_block_comp_type != kNoCompression) {
@@ -1330,24 +1359,28 @@ Status BlockBasedTable::PutDataBlockToCache(
1330
1359
  if (block_cache_compressed != nullptr &&
1331
1360
  raw_block_comp_type != kNoCompression && raw_block_contents != nullptr &&
1332
1361
  raw_block_contents->own_bytes()) {
1333
- #ifndef NDEBUG
1334
1362
  assert(raw_block_contents->is_raw_block);
1335
- #endif // NDEBUG
1363
+ assert(!cache_key.empty());
1336
1364
 
1337
1365
  // We cannot directly put raw_block_contents because this could point to
1338
1366
  // an object in the stack.
1339
- BlockContents* block_cont_for_comp_cache =
1340
- new BlockContents(std::move(*raw_block_contents));
1341
- s = block_cache_compressed->Insert(
1342
- compressed_block_cache_key, block_cont_for_comp_cache,
1343
- block_cont_for_comp_cache->ApproximateMemoryUsage(),
1344
- &DeleteCachedEntry<BlockContents>);
1367
+ std::unique_ptr<BlockContents> block_cont_for_comp_cache(
1368
+ new BlockContents(std::move(*raw_block_contents)));
1369
+ s = InsertEntryToCache(
1370
+ rep_->ioptions.lowest_used_cache_tier, block_cache_compressed,
1371
+ cache_key,
1372
+ BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
1373
+ block_cont_for_comp_cache,
1374
+ block_cont_for_comp_cache->ApproximateMemoryUsage(), nullptr,
1375
+ Cache::Priority::LOW);
1376
+
1377
+ BlockContents* block_cont_raw_ptr = block_cont_for_comp_cache.release();
1345
1378
  if (s.ok()) {
1346
1379
  // Avoid the following code to delete this cached block.
1347
1380
  RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD);
1348
1381
  } else {
1349
1382
  RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
1350
- delete block_cont_for_comp_cache;
1383
+ delete block_cont_raw_ptr;
1351
1384
  }
1352
1385
  }
1353
1386
 
@@ -1355,16 +1388,17 @@ Status BlockBasedTable::PutDataBlockToCache(
1355
1388
  if (block_cache != nullptr && block_holder->own_bytes()) {
1356
1389
  size_t charge = block_holder->ApproximateMemoryUsage();
1357
1390
  Cache::Handle* cache_handle = nullptr;
1358
- s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
1359
- &DeleteCachedEntry<TBlocklike>, &cache_handle,
1360
- priority);
1391
+ s = InsertEntryToCache(
1392
+ rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
1393
+ BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
1394
+ block_holder, charge, &cache_handle, priority);
1361
1395
  if (s.ok()) {
1362
1396
  assert(cache_handle != nullptr);
1363
1397
  cached_block->SetCachedValue(block_holder.release(), block_cache,
1364
1398
  cache_handle);
1365
1399
 
1366
1400
  UpdateCacheInsertionMetrics(block_type, get_context, charge,
1367
- s.IsOkOverwritten());
1401
+ s.IsOkOverwritten(), rep_->ioptions.stats);
1368
1402
  } else {
1369
1403
  RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
1370
1404
  }
@@ -1429,8 +1463,7 @@ DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
1429
1463
  DataBlockIter* input_iter, bool block_contents_pinned) {
1430
1464
  return block->NewDataIterator(rep->internal_comparator.user_comparator(),
1431
1465
  rep->get_global_seqno(block_type), input_iter,
1432
- rep->ioptions.statistics,
1433
- block_contents_pinned);
1466
+ rep->ioptions.stats, block_contents_pinned);
1434
1467
  }
1435
1468
 
1436
1469
  template <>
@@ -1439,7 +1472,7 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
1439
1472
  IndexBlockIter* input_iter, bool block_contents_pinned) {
1440
1473
  return block->NewIndexIterator(
1441
1474
  rep->internal_comparator.user_comparator(),
1442
- rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics,
1475
+ rep->get_global_seqno(block_type), input_iter, rep->ioptions.stats,
1443
1476
  /* total_order_seek */ true, rep->index_has_first_key,
1444
1477
  rep->index_key_includes_seq, rep->index_value_is_full,
1445
1478
  block_contents_pinned);
@@ -1454,6 +1487,7 @@ template <typename TBlocklike>
1454
1487
  Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1455
1488
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1456
1489
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1490
+ const bool wait, const bool for_compaction,
1457
1491
  CachableEntry<TBlocklike>* block_entry, BlockType block_type,
1458
1492
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
1459
1493
  BlockContents* contents) const {
@@ -1467,39 +1501,40 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1467
1501
  //
1468
1502
  // If either block cache is enabled, we'll try to read from it.
1469
1503
  Status s;
1470
- char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
1471
- char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
1472
- Slice key /* key to the block cache */;
1473
- Slice ckey /* key to the compressed block cache */;
1504
+ CacheKey key_data;
1505
+ Slice key;
1474
1506
  bool is_cache_hit = false;
1475
1507
  if (block_cache != nullptr || block_cache_compressed != nullptr) {
1476
1508
  // create key for block cache
1477
- if (block_cache != nullptr) {
1478
- key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
1479
- handle, cache_key);
1480
- }
1481
-
1482
- if (block_cache_compressed != nullptr) {
1483
- ckey = GetCacheKey(rep_->compressed_cache_key_prefix,
1484
- rep_->compressed_cache_key_prefix_size, handle,
1485
- compressed_cache_key);
1486
- }
1509
+ key_data = GetCacheKey(rep_->base_cache_key, handle);
1510
+ key = key_data.AsSlice();
1487
1511
 
1488
1512
  if (!contents) {
1489
- s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
1490
- ro, block_entry, uncompression_dict, block_type,
1491
- get_context);
1492
- if (block_entry->GetValue()) {
1513
+ s = GetDataBlockFromCache(key, block_cache, block_cache_compressed, ro,
1514
+ block_entry, uncompression_dict, block_type,
1515
+ wait, get_context);
1516
+ // Value could still be null at this point, so check the cache handle
1517
+ // and update the read pattern for prefetching
1518
+ if (block_entry->GetValue() || block_entry->GetCacheHandle()) {
1493
1519
  // TODO(haoyu): Differentiate cache hit on uncompressed block cache and
1494
1520
  // compressed block cache.
1495
1521
  is_cache_hit = true;
1522
+ if (prefetch_buffer) {
1523
+ // Update the block details so that PrefetchBuffer can use the read
1524
+ // pattern to determine if reads are sequential or not for
1525
+ // prefetching. It should also take in account blocks read from cache.
1526
+ prefetch_buffer->UpdateReadPattern(
1527
+ handle.offset(), BlockSizeWithTrailer(handle),
1528
+ ro.adaptive_readahead /*decrease_readahead_size*/);
1529
+ }
1496
1530
  }
1497
1531
  }
1498
1532
 
1499
1533
  // Can't find the block from the cache. If I/O is allowed, read from the
1500
1534
  // file.
1501
- if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) {
1502
- Statistics* statistics = rep_->ioptions.statistics;
1535
+ if (block_entry->GetValue() == nullptr &&
1536
+ block_entry->GetCacheHandle() == nullptr && !no_io && ro.fill_cache) {
1537
+ Statistics* statistics = rep_->ioptions.stats;
1503
1538
  const bool maybe_compressed =
1504
1539
  block_type != BlockType::kFilter &&
1505
1540
  block_type != BlockType::kCompressionDictionary &&
@@ -1508,7 +1543,9 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1508
1543
  CompressionType raw_block_comp_type;
1509
1544
  BlockContents raw_block_contents;
1510
1545
  if (!contents) {
1511
- StopWatch sw(rep_->ioptions.env, statistics, READ_BLOCK_GET_MICROS);
1546
+ Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS
1547
+ : READ_BLOCK_GET_MICROS;
1548
+ StopWatch sw(rep_->ioptions.clock, statistics, histogram);
1512
1549
  BlockFetcher block_fetcher(
1513
1550
  rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
1514
1551
  &raw_block_contents, rep_->ioptions, do_uncompress,
@@ -1535,15 +1572,15 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1535
1572
  }
1536
1573
  }
1537
1574
  } else {
1538
- raw_block_comp_type = contents->get_compression_type();
1575
+ raw_block_comp_type = GetBlockCompressionType(*contents);
1539
1576
  }
1540
1577
 
1541
1578
  if (s.ok()) {
1542
1579
  // If filling cache is allowed and a cache is configured, try to put the
1543
1580
  // block to the cache.
1544
1581
  s = PutDataBlockToCache(
1545
- key, ckey, block_cache, block_cache_compressed, block_entry,
1546
- contents, raw_block_comp_type, uncompression_dict,
1582
+ key, block_cache, block_cache_compressed, block_entry, contents,
1583
+ raw_block_comp_type, uncompression_dict,
1547
1584
  GetMemoryAllocator(rep_->table_options), block_type, get_context);
1548
1585
  }
1549
1586
  }
@@ -1597,7 +1634,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1597
1634
  // Avoid making copy of block_key and cf_name when constructing the access
1598
1635
  // record.
1599
1636
  BlockCacheTraceRecord access_record(
1600
- rep_->ioptions.env->NowMicros(),
1637
+ rep_->ioptions.clock->NowMicros(),
1601
1638
  /*block_key=*/"", trace_block_type,
1602
1639
  /*block_size=*/usage, rep_->cf_id_for_tracing(),
1603
1640
  /*cf_name=*/"", rep_->level_for_tracing(),
@@ -1642,7 +1679,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1642
1679
  char* scratch, const UncompressionDict& uncompression_dict) const {
1643
1680
  RandomAccessFileReader* file = rep_->file.get();
1644
1681
  const Footer& footer = rep_->footer;
1645
- const ImmutableCFOptions& ioptions = rep_->ioptions;
1682
+ const ImmutableOptions& ioptions = rep_->ioptions;
1646
1683
  size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
1647
1684
  MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
1648
1685
 
@@ -1661,7 +1698,8 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1661
1698
  RetrieveBlock(nullptr, options, handle, uncompression_dict,
1662
1699
  &(*results)[idx_in_batch], BlockType::kData,
1663
1700
  mget_iter->get_context, &lookup_data_block_context,
1664
- /* for_compaction */ false, /* use_cache */ true);
1701
+ /* for_compaction */ false, /* use_cache */ true,
1702
+ /* wait_for_cache */ true);
1665
1703
  }
1666
1704
  return;
1667
1705
  }
@@ -1696,7 +1734,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1696
1734
  if (use_shared_buffer && !file->use_direct_io() &&
1697
1735
  prev_end == handle.offset()) {
1698
1736
  req_offset_for_block.emplace_back(prev_len);
1699
- prev_len += block_size(handle);
1737
+ prev_len += BlockSizeWithTrailer(handle);
1700
1738
  } else {
1701
1739
  // No compression or current block and previous one is not adjacent:
1702
1740
  // Step 1, create a new request for previous blocks
@@ -1717,10 +1755,13 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1717
1755
 
1718
1756
  // Step 2, remeber the previous block info
1719
1757
  prev_offset = handle.offset();
1720
- prev_len = block_size(handle);
1758
+ prev_len = BlockSizeWithTrailer(handle);
1721
1759
  req_offset_for_block.emplace_back(0);
1722
1760
  }
1723
1761
  req_idx_for_block.emplace_back(read_reqs.size());
1762
+
1763
+ PERF_COUNTER_ADD(block_read_count, 1);
1764
+ PERF_COUNTER_ADD(block_read_byte, BlockSizeWithTrailer(handle));
1724
1765
  }
1725
1766
  // Handle the last block and process the pending last request
1726
1767
  if (prev_len != 0) {
@@ -1740,15 +1781,17 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1740
1781
  AlignedBuf direct_io_buf;
1741
1782
  {
1742
1783
  IOOptions opts;
1743
- IOStatus s = PrepareIOFromReadOptions(options, file->env(), opts);
1744
- if (s.IsTimedOut()) {
1784
+ IOStatus s = file->PrepareIOOptions(options, opts);
1785
+ if (s.ok()) {
1786
+ s = file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf,
1787
+ options.rate_limiter_priority);
1788
+ }
1789
+ if (!s.ok()) {
1790
+ // Discard all the results in this batch if there is any time out
1791
+ // or overall MultiRead error
1745
1792
  for (FSReadRequest& req : read_reqs) {
1746
1793
  req.status = s;
1747
1794
  }
1748
- } else {
1749
- // How to handle this status code?
1750
- file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf)
1751
- .PermitUncheckedError();
1752
1795
  }
1753
1796
  }
1754
1797
 
@@ -1775,7 +1818,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1775
1818
  Status s = req.status;
1776
1819
  if (s.ok()) {
1777
1820
  if ((req.result.size() != req.len) ||
1778
- (req_offset + block_size(handle) > req.result.size())) {
1821
+ (req_offset + BlockSizeWithTrailer(handle) > req.result.size())) {
1779
1822
  s = Status::Corruption(
1780
1823
  "truncated block read from " + rep_->file->file_name() +
1781
1824
  " offset " + ToString(handle.offset()) + ", expected " +
@@ -1789,7 +1832,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1789
1832
  // We allocated a buffer for this block. Give ownership of it to
1790
1833
  // BlockContents so it can free the memory
1791
1834
  assert(req.result.data() == req.scratch);
1792
- assert(req.result.size() == block_size(handle));
1835
+ assert(req.result.size() == BlockSizeWithTrailer(handle));
1793
1836
  assert(req_offset == 0);
1794
1837
  std::unique_ptr<char[]> raw_block(req.scratch);
1795
1838
  raw_block_contents = BlockContents(std::move(raw_block), handle.size());
@@ -1812,9 +1855,9 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1812
1855
  // begin address of each read request, we need to add the offset
1813
1856
  // in each read request. Checksum is stored in the block trailer,
1814
1857
  // beyond the payload size.
1815
- s = ROCKSDB_NAMESPACE::VerifyBlockChecksum(
1816
- footer.checksum(), data + req_offset, handle.size(),
1817
- rep_->file->file_name(), handle.offset());
1858
+ s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset,
1859
+ handle.size(), rep_->file->file_name(),
1860
+ handle.offset());
1818
1861
  TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
1819
1862
  }
1820
1863
  } else if (!use_shared_buffer) {
@@ -1835,11 +1878,12 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1835
1878
  // In all other cases, the raw block is either uncompressed into a heap
1836
1879
  // buffer or there is no cache at all.
1837
1880
  CompressionType compression_type =
1838
- raw_block_contents.get_compression_type();
1881
+ GetBlockCompressionType(raw_block_contents);
1839
1882
  if (use_shared_buffer && (compression_type == kNoCompression ||
1840
1883
  (compression_type != kNoCompression &&
1841
1884
  rep_->table_options.block_cache_compressed))) {
1842
- Slice raw = Slice(req.result.data() + req_offset, block_size(handle));
1885
+ Slice raw =
1886
+ Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
1843
1887
  raw_block_contents = BlockContents(
1844
1888
  CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), raw),
1845
1889
  handle.size());
@@ -1858,9 +1902,10 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1858
1902
  // necessary. Since we're passing the raw block contents, it will
1859
1903
  // avoid looking up the block cache
1860
1904
  s = MaybeReadBlockAndLoadToCache(
1861
- nullptr, options, handle, uncompression_dict, block_entry,
1862
- BlockType::kData, mget_iter->get_context,
1863
- &lookup_data_block_context, &raw_block_contents);
1905
+ nullptr, options, handle, uncompression_dict, /*wait=*/true,
1906
+ /*for_compaction=*/false, block_entry, BlockType::kData,
1907
+ mget_iter->get_context, &lookup_data_block_context,
1908
+ &raw_block_contents);
1864
1909
 
1865
1910
  // block_entry value could be null if no block cache is present, i.e
1866
1911
  // BlockBasedTableOptions::no_block_cache is true and no compressed
@@ -1873,14 +1918,14 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1873
1918
  }
1874
1919
 
1875
1920
  CompressionType compression_type =
1876
- raw_block_contents.get_compression_type();
1921
+ GetBlockCompressionType(raw_block_contents);
1877
1922
  BlockContents contents;
1878
1923
  if (compression_type != kNoCompression) {
1879
1924
  UncompressionContext context(compression_type);
1880
1925
  UncompressionInfo info(context, uncompression_dict, compression_type);
1881
- s = UncompressBlockContents(info, req.result.data() + req_offset,
1882
- handle.size(), &contents, footer.version(),
1883
- rep_->ioptions, memory_allocator);
1926
+ s = UncompressBlockContents(
1927
+ info, req.result.data() + req_offset, handle.size(), &contents,
1928
+ footer.format_version(), rep_->ioptions, memory_allocator);
1884
1929
  } else {
1885
1930
  // There are two cases here:
1886
1931
  // 1) caller uses the shared buffer (scratch or direct io buffer);
@@ -1893,7 +1938,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
1893
1938
  }
1894
1939
  if (s.ok()) {
1895
1940
  (*results)[idx_in_batch].SetOwnedValue(new Block(
1896
- std::move(contents), read_amp_bytes_per_bit, ioptions.statistics));
1941
+ std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
1897
1942
  }
1898
1943
  }
1899
1944
  (*statuses)[idx_in_batch] = s;
@@ -1906,22 +1951,23 @@ Status BlockBasedTable::RetrieveBlock(
1906
1951
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1907
1952
  CachableEntry<TBlocklike>* block_entry, BlockType block_type,
1908
1953
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
1909
- bool for_compaction, bool use_cache) const {
1954
+ bool for_compaction, bool use_cache, bool wait_for_cache) const {
1910
1955
  assert(block_entry);
1911
1956
  assert(block_entry->IsEmpty());
1912
1957
 
1913
1958
  Status s;
1914
1959
  if (use_cache) {
1915
- s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle,
1916
- uncompression_dict, block_entry,
1917
- block_type, get_context, lookup_context,
1918
- /*contents=*/nullptr);
1960
+ s = MaybeReadBlockAndLoadToCache(
1961
+ prefetch_buffer, ro, handle, uncompression_dict, wait_for_cache,
1962
+ for_compaction, block_entry, block_type, get_context, lookup_context,
1963
+ /*contents=*/nullptr);
1919
1964
 
1920
1965
  if (!s.ok()) {
1921
1966
  return s;
1922
1967
  }
1923
1968
 
1924
- if (block_entry->GetValue() != nullptr) {
1969
+ if (block_entry->GetValue() != nullptr ||
1970
+ block_entry->GetCacheHandle() != nullptr) {
1925
1971
  assert(s.ok());
1926
1972
  return s;
1927
1973
  }
@@ -1942,8 +1988,9 @@ Status BlockBasedTable::RetrieveBlock(
1942
1988
  std::unique_ptr<TBlocklike> block;
1943
1989
 
1944
1990
  {
1945
- StopWatch sw(rep_->ioptions.env, rep_->ioptions.statistics,
1946
- READ_BLOCK_GET_MICROS);
1991
+ Histograms histogram =
1992
+ for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
1993
+ StopWatch sw(rep_->ioptions.clock, rep_->ioptions.stats, histogram);
1947
1994
  s = ReadBlockFromFile(
1948
1995
  rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block,
1949
1996
  rep_->ioptions, do_uncompress, maybe_compressed, block_type,
@@ -1989,32 +2036,32 @@ template Status BlockBasedTable::RetrieveBlock<BlockContents>(
1989
2036
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1990
2037
  CachableEntry<BlockContents>* block_entry, BlockType block_type,
1991
2038
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
1992
- bool for_compaction, bool use_cache) const;
2039
+ bool for_compaction, bool use_cache, bool wait_for_cache) const;
1993
2040
 
1994
2041
  template Status BlockBasedTable::RetrieveBlock<ParsedFullFilterBlock>(
1995
2042
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1996
2043
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1997
2044
  CachableEntry<ParsedFullFilterBlock>* block_entry, BlockType block_type,
1998
2045
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
1999
- bool for_compaction, bool use_cache) const;
2046
+ bool for_compaction, bool use_cache, bool wait_for_cache) const;
2000
2047
 
2001
2048
  template Status BlockBasedTable::RetrieveBlock<Block>(
2002
2049
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
2003
2050
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
2004
2051
  CachableEntry<Block>* block_entry, BlockType block_type,
2005
2052
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
2006
- bool for_compaction, bool use_cache) const;
2053
+ bool for_compaction, bool use_cache, bool wait_for_cache) const;
2007
2054
 
2008
2055
  template Status BlockBasedTable::RetrieveBlock<UncompressionDict>(
2009
2056
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
2010
2057
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
2011
2058
  CachableEntry<UncompressionDict>* block_entry, BlockType block_type,
2012
2059
  GetContext* get_context, BlockCacheLookupContext* lookup_context,
2013
- bool for_compaction, bool use_cache) const;
2060
+ bool for_compaction, bool use_cache, bool wait_for_cache) const;
2014
2061
 
2015
2062
  BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
2016
2063
  const BlockBasedTable* table,
2017
- std::unordered_map<uint64_t, CachableEntry<Block>>* block_map)
2064
+ UnorderedMap<uint64_t, CachableEntry<Block>>* block_map)
2018
2065
  : table_(table), block_map_(block_map) {}
2019
2066
 
2020
2067
  InternalIteratorBase<IndexValue>*
@@ -2022,24 +2069,23 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
2022
2069
  const BlockHandle& handle) {
2023
2070
  // Return a block iterator on the index partition
2024
2071
  auto block = block_map_->find(handle.offset());
2025
- // This is a possible scenario since block cache might not have had space
2026
- // for the partition
2027
- if (block != block_map_->end()) {
2028
- const Rep* rep = table_->get_rep();
2029
- assert(rep);
2030
-
2031
- Statistics* kNullStats = nullptr;
2032
- // We don't return pinned data from index blocks, so no need
2033
- // to set `block_contents_pinned`.
2034
- return block->second.GetValue()->NewIndexIterator(
2035
- rep->internal_comparator.user_comparator(),
2036
- rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
2037
- rep->index_has_first_key, rep->index_key_includes_seq,
2038
- rep->index_value_is_full);
2039
- }
2040
- // Create an empty iterator
2041
- // TODO(ajkr): this is not the right way to handle an unpinned partition.
2042
- return new IndexBlockIter();
2072
+ // block_map_ must be exhaustive
2073
+ if (block == block_map_->end()) {
2074
+ assert(false);
2075
+ // Signal problem to caller
2076
+ return nullptr;
2077
+ }
2078
+ const Rep* rep = table_->get_rep();
2079
+ assert(rep);
2080
+
2081
+ Statistics* kNullStats = nullptr;
2082
+ // We don't return pinned data from index blocks, so no need
2083
+ // to set `block_contents_pinned`.
2084
+ return block->second.GetValue()->NewIndexIterator(
2085
+ rep->internal_comparator.user_comparator(),
2086
+ rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
2087
+ rep->index_has_first_key, rep->index_key_includes_seq,
2088
+ rep->index_value_is_full);
2043
2089
  }
2044
2090
 
2045
2091
  // This will be broken if the user specifies an unusual implementation
@@ -2156,7 +2202,7 @@ bool BlockBasedTable::PrefixMayMatch(
2156
2202
  }
2157
2203
 
2158
2204
  if (filter_checked) {
2159
- Statistics* statistics = rep_->ioptions.statistics;
2205
+ Statistics* statistics = rep_->ioptions.stats;
2160
2206
  RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
2161
2207
  if (!may_match) {
2162
2208
  RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
@@ -2166,6 +2212,17 @@ bool BlockBasedTable::PrefixMayMatch(
2166
2212
  return may_match;
2167
2213
  }
2168
2214
 
2215
+ bool BlockBasedTable::PrefixExtractorChanged(
2216
+ const SliceTransform* prefix_extractor) const {
2217
+ if (prefix_extractor == nullptr) {
2218
+ return true;
2219
+ } else if (prefix_extractor == rep_->table_prefix_extractor.get()) {
2220
+ return false;
2221
+ } else {
2222
+ return PrefixExtractorChangedHelper(rep_->table_properties.get(),
2223
+ prefix_extractor);
2224
+ }
2225
+ }
2169
2226
 
2170
2227
  InternalIterator* BlockBasedTable::NewIterator(
2171
2228
  const ReadOptions& read_options, const SliceTransform* prefix_extractor,
@@ -2173,8 +2230,7 @@ InternalIterator* BlockBasedTable::NewIterator(
2173
2230
  size_t compaction_readahead_size, bool allow_unprepared_value) {
2174
2231
  BlockCacheLookupContext lookup_context{caller};
2175
2232
  bool need_upper_bound_check =
2176
- read_options.auto_prefix_mode ||
2177
- PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
2233
+ read_options.auto_prefix_mode || PrefixExtractorChanged(prefix_extractor);
2178
2234
  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(NewIndexIterator(
2179
2235
  read_options,
2180
2236
  need_upper_bound_check &&
@@ -2212,8 +2268,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
2212
2268
  }
2213
2269
 
2214
2270
  bool BlockBasedTable::FullFilterKeyMayMatch(
2215
- const ReadOptions& read_options, FilterBlockReader* filter,
2216
- const Slice& internal_key, const bool no_io,
2271
+ FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
2217
2272
  const SliceTransform* prefix_extractor, GetContext* get_context,
2218
2273
  BlockCacheLookupContext* lookup_context) const {
2219
2274
  if (filter == nullptr || filter->IsBlockBased()) {
@@ -2228,26 +2283,25 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
2228
2283
  may_match =
2229
2284
  filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
2230
2285
  no_io, const_ikey_ptr, get_context, lookup_context);
2231
- } else if (!read_options.total_order_seek && prefix_extractor &&
2232
- rep_->table_properties->prefix_extractor_name.compare(
2233
- prefix_extractor->Name()) == 0 &&
2286
+ } else if (!PrefixExtractorChanged(prefix_extractor) &&
2234
2287
  prefix_extractor->InDomain(user_key_without_ts) &&
2235
2288
  !filter->PrefixMayMatch(
2236
2289
  prefix_extractor->Transform(user_key_without_ts),
2237
2290
  prefix_extractor, kNotValid, no_io, const_ikey_ptr,
2238
2291
  get_context, lookup_context)) {
2292
+ // FIXME ^^^: there should be no reason for Get() to depend on current
2293
+ // prefix_extractor at all. It should always use table_prefix_extractor.
2239
2294
  may_match = false;
2240
2295
  }
2241
2296
  if (may_match) {
2242
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
2297
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE);
2243
2298
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level);
2244
2299
  }
2245
2300
  return may_match;
2246
2301
  }
2247
2302
 
2248
2303
  void BlockBasedTable::FullFilterKeysMayMatch(
2249
- const ReadOptions& read_options, FilterBlockReader* filter,
2250
- MultiGetRange* range, const bool no_io,
2304
+ FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
2251
2305
  const SliceTransform* prefix_extractor,
2252
2306
  BlockCacheLookupContext* lookup_context) const {
2253
2307
  if (filter == nullptr || filter->IsBlockBased()) {
@@ -2260,28 +2314,26 @@ void BlockBasedTable::FullFilterKeysMayMatch(
2260
2314
  lookup_context);
2261
2315
  uint64_t after_keys = range->KeysLeft();
2262
2316
  if (after_keys) {
2263
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE,
2264
- after_keys);
2317
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
2265
2318
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys,
2266
2319
  rep_->level);
2267
2320
  }
2268
2321
  uint64_t filtered_keys = before_keys - after_keys;
2269
2322
  if (filtered_keys) {
2270
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL, filtered_keys);
2323
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL, filtered_keys);
2271
2324
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
2272
2325
  rep_->level);
2273
2326
  }
2274
- } else if (!read_options.total_order_seek && prefix_extractor &&
2275
- rep_->table_properties->prefix_extractor_name.compare(
2276
- prefix_extractor->Name()) == 0) {
2327
+ } else if (!PrefixExtractorChanged(prefix_extractor)) {
2328
+ // FIXME ^^^: there should be no reason for MultiGet() to depend on current
2329
+ // prefix_extractor at all. It should always use table_prefix_extractor.
2277
2330
  filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
2278
2331
  lookup_context);
2279
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_CHECKED,
2280
- before_keys);
2332
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
2281
2333
  uint64_t after_keys = range->KeysLeft();
2282
2334
  uint64_t filtered_keys = before_keys - after_keys;
2283
2335
  if (filtered_keys) {
2284
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_USEFUL,
2336
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL,
2285
2337
  filtered_keys);
2286
2338
  }
2287
2339
  }
@@ -2312,12 +2364,11 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2312
2364
  read_options.snapshot != nullptr;
2313
2365
  }
2314
2366
  TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
2315
- const bool may_match =
2316
- FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor,
2317
- get_context, &lookup_context);
2367
+ const bool may_match = FullFilterKeyMayMatch(
2368
+ filter, key, no_io, prefix_extractor, get_context, &lookup_context);
2318
2369
  TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
2319
2370
  if (!may_match) {
2320
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
2371
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
2321
2372
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
2322
2373
  } else {
2323
2374
  IndexBlockIter iiter_on_stack;
@@ -2325,8 +2376,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2325
2376
  // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
2326
2377
  bool need_upper_bound_check = false;
2327
2378
  if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
2328
- need_upper_bound_check = PrefixExtractorChanged(
2329
- rep_->table_properties.get(), prefix_extractor);
2379
+ need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
2330
2380
  }
2331
2381
  auto iiter =
2332
2382
  NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
@@ -2354,15 +2404,16 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2354
2404
  // Not found
2355
2405
  // TODO: think about interaction with Merge. If a user key cannot
2356
2406
  // cross one data block, we should be fine.
2357
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
2407
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
2358
2408
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
2359
2409
  break;
2360
2410
  }
2361
2411
 
2362
2412
  if (!v.first_internal_key.empty() && !skip_filters &&
2363
2413
  UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2364
- .Compare(ExtractUserKey(key),
2365
- ExtractUserKey(v.first_internal_key)) < 0) {
2414
+ .CompareWithoutTimestamp(
2415
+ ExtractUserKey(key),
2416
+ ExtractUserKey(v.first_internal_key)) < 0) {
2366
2417
  // The requested key falls between highest key in previous block and
2367
2418
  // lowest key in current block.
2368
2419
  break;
@@ -2385,6 +2436,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2385
2436
  // Update Saver.state to Found because we are only looking for
2386
2437
  // whether we can guarantee the key is not there when "no_io" is set
2387
2438
  get_context->MarkKeyMayExist();
2439
+ s = biter.status();
2388
2440
  break;
2389
2441
  }
2390
2442
  if (!biter.status().ok()) {
@@ -2435,7 +2487,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2435
2487
  referenced_key = key;
2436
2488
  }
2437
2489
  BlockCacheTraceRecord access_record(
2438
- rep_->ioptions.env->NowMicros(),
2490
+ rep_->ioptions.clock->NowMicros(),
2439
2491
  /*block_key=*/"", lookup_data_block_context.block_type,
2440
2492
  lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
2441
2493
  /*cf_name=*/"", rep_->level_for_tracing(),
@@ -2461,7 +2513,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2461
2513
  }
2462
2514
  }
2463
2515
  if (matched && filter != nullptr && !filter->IsBlockBased()) {
2464
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2516
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2465
2517
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
2466
2518
  rep_->level);
2467
2519
  }
@@ -2499,8 +2551,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2499
2551
  BlockCacheLookupContext lookup_context{
2500
2552
  TableReaderCaller::kUserMultiGet, tracing_mget_id,
2501
2553
  /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
2502
- FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io,
2503
- prefix_extractor, &lookup_context);
2554
+ FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
2555
+ &lookup_context);
2504
2556
 
2505
2557
  if (!sst_file_range.empty()) {
2506
2558
  IndexBlockIter iiter_on_stack;
@@ -2508,8 +2560,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2508
2560
  // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
2509
2561
  bool need_upper_bound_check = false;
2510
2562
  if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
2511
- need_upper_bound_check = PrefixExtractorChanged(
2512
- rep_->table_properties.get(), prefix_extractor);
2563
+ need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
2513
2564
  }
2514
2565
  auto iiter =
2515
2566
  NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
@@ -2528,6 +2579,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2528
2579
  {
2529
2580
  MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
2530
2581
  sst_file_range.end());
2582
+ std::vector<Cache::Handle*> cache_handles;
2583
+ bool wait_for_cache_results = false;
2531
2584
 
2532
2585
  CachableEntry<UncompressionDict> uncompression_dict;
2533
2586
  Status uncompression_dict_status;
@@ -2549,8 +2602,9 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2549
2602
  if (!iiter->Valid() ||
2550
2603
  (!v.first_internal_key.empty() && !skip_filters &&
2551
2604
  UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2552
- .Compare(ExtractUserKey(key),
2553
- ExtractUserKey(v.first_internal_key)) < 0)) {
2605
+ .CompareWithoutTimestamp(
2606
+ ExtractUserKey(key),
2607
+ ExtractUserKey(v.first_internal_key)) < 0)) {
2554
2608
  // The requested key falls between highest key in previous block and
2555
2609
  // lowest key in current block.
2556
2610
  if (!iiter->status().IsNotFound()) {
@@ -2565,6 +2619,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2565
2619
  uncompression_dict_status =
2566
2620
  rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
2567
2621
  nullptr /* prefetch_buffer */, no_io,
2622
+ read_options.verify_checksums,
2568
2623
  sst_file_range.begin()->get_context, &lookup_context,
2569
2624
  &uncompression_dict);
2570
2625
  uncompression_dict_inited = true;
@@ -2599,17 +2654,58 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2599
2654
  Status s = RetrieveBlock(
2600
2655
  nullptr, ro, handle, dict, &(results.back()), BlockType::kData,
2601
2656
  miter->get_context, &lookup_data_block_context,
2602
- /* for_compaction */ false, /* use_cache */ true);
2657
+ /* for_compaction */ false, /* use_cache */ true,
2658
+ /* wait_for_cache */ false);
2603
2659
  if (s.IsIncomplete()) {
2604
2660
  s = Status::OK();
2605
2661
  }
2606
2662
  if (s.ok() && !results.back().IsEmpty()) {
2607
- // Found it in the cache. Add NULL handle to indicate there is
2608
- // nothing to read from disk
2609
- block_handles.emplace_back(BlockHandle::NullBlockHandle());
2663
+ // Since we have a valid handle, check the value. If its nullptr,
2664
+ // it means the cache is waiting for the final result and we're
2665
+ // supposed to call WaitAll() to wait for the result.
2666
+ if (results.back().GetValue() != nullptr) {
2667
+ // Found it in the cache. Add NULL handle to indicate there is
2668
+ // nothing to read from disk.
2669
+ if (results.back().GetCacheHandle()) {
2670
+ results.back().UpdateCachedValue();
2671
+ }
2672
+ block_handles.emplace_back(BlockHandle::NullBlockHandle());
2673
+ } else {
2674
+ // We have to wait for the cache lookup to finish in the
2675
+ // background, and then we may have to read the block from disk
2676
+ // anyway
2677
+ assert(results.back().GetCacheHandle());
2678
+ wait_for_cache_results = true;
2679
+ block_handles.emplace_back(handle);
2680
+ cache_handles.emplace_back(results.back().GetCacheHandle());
2681
+ }
2610
2682
  } else {
2611
2683
  block_handles.emplace_back(handle);
2612
- total_len += block_size(handle);
2684
+ total_len += BlockSizeWithTrailer(handle);
2685
+ }
2686
+ }
2687
+
2688
+ if (wait_for_cache_results) {
2689
+ Cache* block_cache = rep_->table_options.block_cache.get();
2690
+ block_cache->WaitAll(cache_handles);
2691
+ for (size_t i = 0; i < block_handles.size(); ++i) {
2692
+ // If this block was a success or failure or not needed because
2693
+ // the corresponding key is in the same block as a prior key, skip
2694
+ if (block_handles[i] == BlockHandle::NullBlockHandle() ||
2695
+ results[i].IsEmpty()) {
2696
+ continue;
2697
+ }
2698
+ results[i].UpdateCachedValue();
2699
+ void* val = results[i].GetValue();
2700
+ if (!val) {
2701
+ // The async cache lookup failed - could be due to an error
2702
+ // or a false positive. We need to read the data block from
2703
+ // the SST file
2704
+ results[i].Reset();
2705
+ total_len += BlockSizeWithTrailer(block_handles[i]);
2706
+ } else {
2707
+ block_handles[i] = BlockHandle::NullBlockHandle();
2708
+ }
2613
2709
  }
2614
2710
  }
2615
2711
 
@@ -2688,8 +2784,9 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2688
2784
  IndexValue v = iiter->value();
2689
2785
  if (!v.first_internal_key.empty() && !skip_filters &&
2690
2786
  UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2691
- .Compare(ExtractUserKey(key),
2692
- ExtractUserKey(v.first_internal_key)) < 0) {
2787
+ .CompareWithoutTimestamp(
2788
+ ExtractUserKey(key),
2789
+ ExtractUserKey(v.first_internal_key)) < 0) {
2693
2790
  // The requested key falls between highest key in previous block and
2694
2791
  // lowest key in current block.
2695
2792
  break;
@@ -2771,7 +2868,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2771
2868
  referenced_key = key;
2772
2869
  }
2773
2870
  BlockCacheTraceRecord access_record(
2774
- rep_->ioptions.env->NowMicros(),
2871
+ rep_->ioptions.clock->NowMicros(),
2775
2872
  /*block_key=*/"", lookup_data_block_context.block_type,
2776
2873
  lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
2777
2874
  /*cf_name=*/"", rep_->level_for_tracing(),
@@ -2803,7 +2900,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2803
2900
  } while (iiter->Valid());
2804
2901
 
2805
2902
  if (matched && filter != nullptr && !filter->IsBlockBased()) {
2806
- RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2903
+ RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2807
2904
  PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
2808
2905
  rep_->level);
2809
2906
  }
@@ -2924,11 +3021,11 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
2924
3021
  // increasing of the buffer size.
2925
3022
  size_t readahead_size = (read_options.readahead_size != 0)
2926
3023
  ? read_options.readahead_size
2927
- : kMaxAutoReadaheadSize;
3024
+ : rep_->table_options.max_auto_readahead_size;
2928
3025
  // FilePrefetchBuffer doesn't work in mmap mode and readahead is not
2929
3026
  // needed there.
2930
3027
  FilePrefetchBuffer prefetch_buffer(
2931
- rep_->file.get(), readahead_size /* readadhead_size */,
3028
+ readahead_size /* readahead_size */,
2932
3029
  readahead_size /* max_readahead_size */,
2933
3030
  !rep_->ioptions.allow_mmap_reads /* enable */);
2934
3031
 
@@ -2940,7 +3037,7 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
2940
3037
  BlockHandle handle = index_iter->value().handle;
2941
3038
  BlockContents contents;
2942
3039
  BlockFetcher block_fetcher(
2943
- rep_->file.get(), &prefetch_buffer, rep_->footer, ReadOptions(), handle,
3040
+ rep_->file.get(), &prefetch_buffer, rep_->footer, read_options, handle,
2944
3041
  &contents, rep_->ioptions, false /* decompress */,
2945
3042
  false /*maybe_compressed*/, BlockType::kData,
2946
3043
  UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
@@ -2966,15 +3063,15 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
2966
3063
  return BlockType::kFilter;
2967
3064
  }
2968
3065
 
2969
- if (meta_block_name == kPropertiesBlock) {
3066
+ if (meta_block_name == kPropertiesBlockName) {
2970
3067
  return BlockType::kProperties;
2971
3068
  }
2972
3069
 
2973
- if (meta_block_name == kCompressionDictBlock) {
3070
+ if (meta_block_name == kCompressionDictBlockName) {
2974
3071
  return BlockType::kCompressionDictionary;
2975
3072
  }
2976
3073
 
2977
- if (meta_block_name == kRangeDelBlock) {
3074
+ if (meta_block_name == kRangeDelBlockName) {
2978
3075
  return BlockType::kRangeDeletion;
2979
3076
  }
2980
3077
 
@@ -3003,20 +3100,22 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
3003
3100
  s = handle.DecodeFrom(&input);
3004
3101
  BlockContents contents;
3005
3102
  const Slice meta_block_name = index_iter->key();
3006
- BlockFetcher block_fetcher(
3007
- rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
3008
- ReadOptions(), handle, &contents, rep_->ioptions,
3009
- false /* decompress */, false /*maybe_compressed*/,
3010
- GetBlockTypeForMetaBlockByName(meta_block_name),
3011
- UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
3012
- s = block_fetcher.ReadBlockContents();
3013
- if (s.IsCorruption() && meta_block_name == kPropertiesBlock) {
3014
- TableProperties* table_properties;
3015
- ReadOptions ro;
3016
- s = TryReadPropertiesWithGlobalSeqno(ro, nullptr /* prefetch_buffer */,
3017
- index_iter->value(),
3018
- &table_properties);
3019
- delete table_properties;
3103
+ if (meta_block_name == kPropertiesBlockName) {
3104
+ // Unfortunate special handling for properties block checksum w/
3105
+ // global seqno
3106
+ std::unique_ptr<TableProperties> table_properties;
3107
+ s = ReadTablePropertiesHelper(ReadOptions(), handle, rep_->file.get(),
3108
+ nullptr /* prefetch_buffer */, rep_->footer,
3109
+ rep_->ioptions, &table_properties,
3110
+ nullptr /* memory_allocator */);
3111
+ } else {
3112
+ s = BlockFetcher(
3113
+ rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
3114
+ ReadOptions(), handle, &contents, rep_->ioptions,
3115
+ false /* decompress */, false /*maybe_compressed*/,
3116
+ GetBlockTypeForMetaBlockByName(meta_block_name),
3117
+ UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options)
3118
+ .ReadBlockContents();
3020
3119
  }
3021
3120
  if (!s.ok()) {
3022
3121
  break;
@@ -3033,12 +3132,9 @@ bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
3033
3132
  return false;
3034
3133
  }
3035
3134
 
3036
- char cache_key_storage[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
3037
- Slice cache_key =
3038
- GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle,
3039
- cache_key_storage);
3135
+ CacheKey key = GetCacheKey(rep_->base_cache_key, handle);
3040
3136
 
3041
- Cache::Handle* const cache_handle = cache->Lookup(cache_key);
3137
+ Cache::Handle* const cache_handle = cache->Lookup(key.AsSlice());
3042
3138
  if (cache_handle == nullptr) {
3043
3139
  return false;
3044
3140
  }
@@ -3067,15 +3163,9 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
3067
3163
  // 5. index_type
3068
3164
  Status BlockBasedTable::CreateIndexReader(
3069
3165
  const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
3070
- InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch,
3071
- bool pin, BlockCacheLookupContext* lookup_context,
3166
+ InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
3167
+ BlockCacheLookupContext* lookup_context,
3072
3168
  std::unique_ptr<IndexReader>* index_reader) {
3073
- // kHashSearch requires non-empty prefix_extractor but bypass checking
3074
- // prefix_extractor here since we have no access to MutableCFOptions.
3075
- // Add need_upper_bound_check flag in BlockBasedTable::NewIndexIterator.
3076
- // If prefix_extractor does not match prefix_extractor_name from table
3077
- // properties, turn off Hash Index by setting total_order_seek to true
3078
-
3079
3169
  switch (rep_->index_type) {
3080
3170
  case BlockBasedTableOptions::kTwoLevelIndexSearch: {
3081
3171
  return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
@@ -3092,25 +3182,13 @@ Status BlockBasedTable::CreateIndexReader(
3092
3182
  case BlockBasedTableOptions::kHashSearch: {
3093
3183
  std::unique_ptr<Block> metaindex_guard;
3094
3184
  std::unique_ptr<InternalIterator> metaindex_iter_guard;
3095
- auto meta_index_iter = preloaded_meta_index_iter;
3096
3185
  bool should_fallback = false;
3186
+ // FIXME: is changed prefix_extractor handled anywhere for hash index?
3097
3187
  if (rep_->internal_prefix_transform.get() == nullptr) {
3098
- ROCKS_LOG_WARN(rep_->ioptions.info_log,
3188
+ ROCKS_LOG_WARN(rep_->ioptions.logger,
3099
3189
  "No prefix extractor passed in. Fall back to binary"
3100
3190
  " search index.");
3101
3191
  should_fallback = true;
3102
- } else if (meta_index_iter == nullptr) {
3103
- auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
3104
- &metaindex_iter_guard);
3105
- if (!s.ok()) {
3106
- // we simply fall back to binary search in case there is any
3107
- // problem with prefix hash index loading.
3108
- ROCKS_LOG_WARN(rep_->ioptions.info_log,
3109
- "Unable to read the metaindex block."
3110
- " Fall back to binary search index.");
3111
- should_fallback = true;
3112
- }
3113
- meta_index_iter = metaindex_iter_guard.get();
3114
3192
  }
3115
3193
 
3116
3194
  if (should_fallback) {
@@ -3118,9 +3196,9 @@ Status BlockBasedTable::CreateIndexReader(
3118
3196
  use_cache, prefetch, pin,
3119
3197
  lookup_context, index_reader);
3120
3198
  } else {
3121
- return HashIndexReader::Create(this, ro, prefetch_buffer,
3122
- meta_index_iter, use_cache, prefetch,
3123
- pin, lookup_context, index_reader);
3199
+ return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter,
3200
+ use_cache, prefetch, pin, lookup_context,
3201
+ index_reader);
3124
3202
  }
3125
3203
  }
3126
3204
  default: {
@@ -3134,6 +3212,7 @@ Status BlockBasedTable::CreateIndexReader(
3134
3212
  uint64_t BlockBasedTable::ApproximateDataOffsetOf(
3135
3213
  const InternalIteratorBase<IndexValue>& index_iter,
3136
3214
  uint64_t data_size) const {
3215
+ assert(index_iter.status().ok());
3137
3216
  if (index_iter.Valid()) {
3138
3217
  BlockHandle handle = index_iter.value().handle;
3139
3218
  return handle.offset();
@@ -3176,8 +3255,16 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
3176
3255
  }
3177
3256
 
3178
3257
  index_iter->Seek(key);
3258
+ uint64_t offset;
3259
+ if (index_iter->status().ok()) {
3260
+ offset = ApproximateDataOffsetOf(*index_iter, data_size);
3261
+ } else {
3262
+ // Split in half to avoid skewing one way or another,
3263
+ // since we don't know whether we're operating on lower bound or
3264
+ // upper bound.
3265
+ return rep_->file_size / 2;
3266
+ }
3179
3267
 
3180
- uint64_t offset = ApproximateDataOffsetOf(*index_iter, data_size);
3181
3268
  // Pro-rate file metadata (incl filters) size-proportionally across data
3182
3269
  // blocks.
3183
3270
  double size_ratio =
@@ -3193,7 +3280,9 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
3193
3280
  uint64_t data_size = GetApproximateDataSize();
3194
3281
  if (UNLIKELY(data_size == 0)) {
3195
3282
  // Hmm. Assume whole file is involved, since we have lower and upper
3196
- // bound.
3283
+ // bound. This likely skews the estimate if we consider that this function
3284
+ // is typically called with `[start, end]` fully contained in the file's
3285
+ // key-range.
3197
3286
  return rep_->file_size;
3198
3287
  }
3199
3288
 
@@ -3211,9 +3300,24 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
3211
3300
  }
3212
3301
 
3213
3302
  index_iter->Seek(start);
3214
- uint64_t start_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3303
+ uint64_t start_offset;
3304
+ if (index_iter->status().ok()) {
3305
+ start_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3306
+ } else {
3307
+ // Assume file is involved from the start. This likely skews the estimate
3308
+ // but is consistent with the above error handling.
3309
+ start_offset = 0;
3310
+ }
3311
+
3215
3312
  index_iter->Seek(end);
3216
- uint64_t end_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3313
+ uint64_t end_offset;
3314
+ if (index_iter->status().ok()) {
3315
+ end_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3316
+ } else {
3317
+ // Assume file is involved until the end. This likely skews the estimate
3318
+ // but is consistent with the above error handling.
3319
+ end_offset = data_size;
3320
+ }
3217
3321
 
3218
3322
  assert(end_offset >= start_offset);
3219
3323
  // Pro-rate file metadata (incl filters) size-proportionally across data
@@ -3226,7 +3330,8 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
3226
3330
 
3227
3331
  bool BlockBasedTable::TEST_FilterBlockInCache() const {
3228
3332
  assert(rep_ != nullptr);
3229
- return TEST_BlockInCache(rep_->filter_handle);
3333
+ return rep_->filter_type != Rep::FilterType::kNoFilter &&
3334
+ TEST_BlockInCache(rep_->filter_handle);
3230
3335
  }
3231
3336
 
3232
3337
  bool BlockBasedTable::TEST_IndexBlockInCache() const {
@@ -3313,17 +3418,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
3313
3418
  if (!s.ok()) {
3314
3419
  return s;
3315
3420
  }
3316
- if (metaindex_iter->key() == kPropertiesBlock) {
3421
+ if (metaindex_iter->key() == kPropertiesBlockName) {
3317
3422
  out_stream << " Properties block handle: "
3318
3423
  << metaindex_iter->value().ToString(true) << "\n";
3319
- } else if (metaindex_iter->key() == kCompressionDictBlock) {
3424
+ } else if (metaindex_iter->key() == kCompressionDictBlockName) {
3320
3425
  out_stream << " Compression dictionary block handle: "
3321
3426
  << metaindex_iter->value().ToString(true) << "\n";
3322
3427
  } else if (strstr(metaindex_iter->key().ToString().c_str(),
3323
3428
  "filter.rocksdb.") != nullptr) {
3324
3429
  out_stream << " Filter block handle: "
3325
3430
  << metaindex_iter->value().ToString(true) << "\n";
3326
- } else if (metaindex_iter->key() == kRangeDelBlock) {
3431
+ } else if (metaindex_iter->key() == kRangeDelBlockName) {
3327
3432
  out_stream << " Range deletion block handle: "
3328
3433
  << metaindex_iter->value().ToString(true) << "\n";
3329
3434
  }
@@ -3360,6 +3465,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
3360
3465
  CachableEntry<UncompressionDict> uncompression_dict;
3361
3466
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
3362
3467
  nullptr /* prefetch_buffer */, false /* no_io */,
3468
+ false, /* verify_checksums */
3363
3469
  nullptr /* get_context */, nullptr /* lookup_context */,
3364
3470
  &uncompression_dict);
3365
3471
  if (!s.ok()) {