@nxtedition/rocksdb 8.2.8 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. package/binding.cc +0 -21
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +20 -10
  3. package/deps/rocksdb/rocksdb/Makefile +37 -25
  4. package/deps/rocksdb/rocksdb/README.md +29 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +25 -2
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +229 -74
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
  10. package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -95
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
  12. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2683 -496
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.h +580 -159
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +145 -42
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +20 -1
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +391 -17
  18. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  19. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +309 -212
  20. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -32
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +439 -12
  22. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +44 -2
  23. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +11 -1
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -3
  25. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
  26. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
  27. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
  28. package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
  29. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  30. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +28 -12
  31. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
  32. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
  33. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  34. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  35. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  36. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  37. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +20 -22
  38. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  39. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  41. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  42. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
  43. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
  44. package/deps/rocksdb/rocksdb/db/builder.cc +35 -10
  45. package/deps/rocksdb/rocksdb/db/c.cc +233 -6
  46. package/deps/rocksdb/rocksdb/db/c_test.c +140 -6
  47. package/deps/rocksdb/rocksdb/db/column_family.cc +110 -51
  48. package/deps/rocksdb/rocksdb/db/column_family.h +34 -2
  49. package/deps/rocksdb/rocksdb/db/column_family_test.cc +314 -7
  50. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +106 -23
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +47 -9
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +10 -11
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +148 -60
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +22 -7
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -4
  60. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +33 -23
  61. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -5
  62. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  63. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
  64. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +90 -4
  65. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +170 -95
  66. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  67. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +32 -58
  68. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +3 -1
  69. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  70. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  71. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  72. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  73. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +46 -10
  74. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -3
  75. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -15
  76. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
  77. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +850 -44
  78. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  79. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +275 -1
  80. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +52 -19
  81. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  82. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +733 -320
  83. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +155 -66
  84. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +516 -155
  85. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +8 -4
  86. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  87. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -4
  88. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +100 -35
  89. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +95 -50
  90. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -9
  91. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +136 -79
  92. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +6 -95
  93. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +31 -22
  94. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
  95. package/deps/rocksdb/rocksdb/db/db_iter.cc +85 -57
  96. package/deps/rocksdb/rocksdb/db/db_iter.h +11 -2
  97. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
  98. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
  99. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
  100. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  101. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +193 -7
  102. package/deps/rocksdb/rocksdb/db/db_options_test.cc +294 -26
  103. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  104. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +364 -0
  105. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +13 -3
  106. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
  107. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
  108. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
  109. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  110. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  111. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +282 -167
  112. package/deps/rocksdb/rocksdb/db/db_test.cc +180 -49
  113. package/deps/rocksdb/rocksdb/db/db_test2.cc +84 -12
  114. package/deps/rocksdb/rocksdb/db/db_test_util.cc +25 -12
  115. package/deps/rocksdb/rocksdb/db/db_test_util.h +45 -2
  116. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +14 -1
  117. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +245 -0
  118. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +480 -1
  119. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  120. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  121. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  122. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  123. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  124. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
  125. package/deps/rocksdb/rocksdb/db/error_handler.cc +67 -34
  126. package/deps/rocksdb/rocksdb/db/error_handler.h +13 -9
  127. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  128. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  129. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  130. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  131. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  132. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +144 -4
  133. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  134. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  135. package/deps/rocksdb/rocksdb/db/flush_job.cc +105 -17
  136. package/deps/rocksdb/rocksdb/db/flush_job.h +27 -4
  137. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +90 -12
  138. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  139. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  140. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  141. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  142. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  143. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  144. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  145. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  146. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  147. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  148. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  149. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -3
  150. package/deps/rocksdb/rocksdb/db/memtable.cc +70 -83
  151. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  152. package/deps/rocksdb/rocksdb/db/memtable_list.cc +45 -11
  153. package/deps/rocksdb/rocksdb/db/memtable_list.h +43 -2
  154. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +91 -5
  155. package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
  156. package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
  157. package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
  158. package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
  159. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -2
  160. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
  161. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +3 -0
  162. package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
  163. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
  164. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
  165. package/deps/rocksdb/rocksdb/db/repair.cc +25 -7
  166. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  167. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +459 -74
  168. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
  169. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
  170. package/deps/rocksdb/rocksdb/db/table_cache.cc +76 -54
  171. package/deps/rocksdb/rocksdb/db/table_cache.h +18 -12
  172. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  173. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  174. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  175. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  176. package/deps/rocksdb/rocksdb/db/version_edit.h +58 -10
  177. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  178. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  179. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  180. package/deps/rocksdb/rocksdb/db/version_set.cc +207 -110
  181. package/deps/rocksdb/rocksdb/db/version_set.h +36 -15
  182. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
  183. package/deps/rocksdb/rocksdb/db/version_set_test.cc +47 -26
  184. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
  185. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
  186. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
  187. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
  188. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
  189. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
  190. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
  191. package/deps/rocksdb/rocksdb/db/write_batch.cc +55 -20
  192. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  193. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  194. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  195. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
  196. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
  197. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
  198. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +37 -13
  199. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +110 -58
  200. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  201. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +68 -17
  202. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
  203. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -1
  204. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +429 -237
  205. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +13 -6
  206. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +21 -14
  207. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
  208. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
  209. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
  210. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  211. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +29 -38
  212. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +302 -101
  213. package/deps/rocksdb/rocksdb/env/env.cc +6 -2
  214. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  215. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  216. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  217. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  218. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  219. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +79 -0
  220. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  221. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  222. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  223. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +100 -70
  224. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +64 -18
  225. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  226. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  227. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1225 -97
  228. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +72 -33
  229. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  230. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  231. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  232. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +40 -14
  233. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +163 -91
  234. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +112 -2
  235. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +108 -16
  236. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
  237. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  238. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +42 -2
  239. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  240. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +92 -12
  241. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +34 -4
  242. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  243. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +91 -13
  244. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
  245. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
  246. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -0
  247. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
  248. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
  249. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +130 -22
  250. package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
  251. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
  252. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +92 -9
  253. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  254. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  255. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +37 -2
  256. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
  257. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
  258. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  259. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +20 -3
  260. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +7 -0
  261. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
  262. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
  263. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  264. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
  265. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  266. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +42 -2
  267. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  268. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  269. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
  270. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -2
  271. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  272. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +4 -3
  273. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
  274. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  275. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  276. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  277. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +22 -1
  278. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
  279. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  280. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  281. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  282. package/deps/rocksdb/rocksdb/options/customizable_test.cc +6 -1
  283. package/deps/rocksdb/rocksdb/options/db_options.cc +54 -2
  284. package/deps/rocksdb/rocksdb/options/db_options.h +4 -0
  285. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  286. package/deps/rocksdb/rocksdb/options/options_helper.cc +18 -0
  287. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +14 -4
  288. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -1
  289. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  290. package/deps/rocksdb/rocksdb/port/README +10 -0
  291. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  292. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  293. package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
  294. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
  295. package/deps/rocksdb/rocksdb/port/stack_trace.cc +32 -12
  296. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  297. package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
  298. package/deps/rocksdb/rocksdb/src.mk +10 -1
  299. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  300. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  301. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  302. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +116 -43
  303. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +9 -6
  304. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +321 -49
  305. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +98 -4
  306. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +233 -98
  307. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +58 -23
  308. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
  309. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +52 -24
  310. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  311. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  312. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  313. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
  314. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
  315. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -18
  316. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +20 -8
  317. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +232 -71
  318. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
  319. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
  320. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
  321. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  322. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  323. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  324. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -2
  325. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  326. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +36 -19
  327. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  328. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  329. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -3
  330. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +11 -7
  331. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  332. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  333. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  334. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +14 -13
  335. package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
  336. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +9 -2
  337. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  338. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  339. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  340. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  341. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  342. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  343. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
  344. package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
  345. package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
  346. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
  347. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +22 -2
  348. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  349. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  350. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  351. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  352. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  353. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  354. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  355. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +45 -9
  356. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
  357. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +24 -1
  358. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  359. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  360. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
  361. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  362. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
  363. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
  364. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  365. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  366. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  367. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  368. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
  369. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +87 -65
  370. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +221 -33
  371. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
  372. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -1
  373. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  374. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -11
  375. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  376. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  377. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  378. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  379. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  380. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  381. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  382. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  383. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  384. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  385. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  386. package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
  387. package/deps/rocksdb/rocksdb/util/cast_util.h +24 -0
  388. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  389. package/deps/rocksdb/rocksdb/util/comparator.cc +55 -8
  390. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  391. package/deps/rocksdb/rocksdb/util/compression.h +119 -35
  392. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  393. package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
  394. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
  395. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  396. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  397. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  398. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  399. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  400. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  401. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  402. package/deps/rocksdb/rocksdb/util/overload.h +23 -0
  403. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
  404. package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
  405. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
  406. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  407. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
  408. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  409. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  410. package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
  411. package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
  412. package/deps/rocksdb/rocksdb/util/thread_operation.h +10 -1
  413. package/deps/rocksdb/rocksdb/util/udt_util.cc +385 -0
  414. package/deps/rocksdb/rocksdb/util/udt_util.h +192 -1
  415. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +461 -0
  416. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  417. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  418. package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
  419. package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
  420. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  421. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +71 -26
  422. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  423. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  424. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  425. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  426. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  427. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  428. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
  429. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +20 -16
  430. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
  431. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  432. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +7 -1
  433. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
  434. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +12 -3
  435. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  436. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
  437. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  438. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  439. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  440. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
  441. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  442. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  443. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  444. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +272 -33
  445. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
  446. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -1
  447. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  448. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  449. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +195 -23
  450. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +19 -12
  451. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +88 -1
  452. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  453. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +43 -17
  454. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  455. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +73 -24
  456. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  457. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  458. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +41 -12
  459. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  460. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +15 -8
  461. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  462. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  463. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  464. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  465. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +59 -28
  466. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
  467. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
  468. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +111 -14
  469. package/deps/rocksdb/rocksdb.gyp +6 -2
  470. package/index.js +0 -8
  471. package/package.json +1 -1
  472. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  473. package/prebuilds/linux-x64/node.napi.node +0 -0
  474. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
  475. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
  476. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
  477. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
  478. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
  479. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
  480. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
  481. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
  482. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
  483. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
@@ -18,10 +18,21 @@ void BlockBasedTableIterator::Seek(const Slice& target) {
18
18
 
19
19
  void BlockBasedTableIterator::SeekImpl(const Slice* target,
20
20
  bool async_prefetch) {
21
- bool is_first_pass = true;
21
+ ResetBlockCacheLookupVar();
22
+ bool is_first_pass = !async_read_in_progress_;
23
+ bool autotune_readaheadsize = is_first_pass &&
24
+ read_options_.auto_readahead_size &&
25
+ read_options_.iterate_upper_bound;
26
+
27
+ if (autotune_readaheadsize &&
28
+ table_->get_rep()->table_options.block_cache.get() &&
29
+ !read_options_.async_io && direction_ == IterDirection::kForward) {
30
+ readahead_cache_lookup_ = true;
31
+ }
32
+
33
+ // Second pass.
22
34
  if (async_read_in_progress_) {
23
35
  AsyncInitDataBlock(false);
24
- is_first_pass = false;
25
36
  }
26
37
 
27
38
  is_out_of_bound_ = false;
@@ -44,7 +55,11 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
44
55
  }
45
56
 
46
57
  bool need_seek_index = true;
47
- if (block_iter_points_to_real_block_ && block_iter_.Valid()) {
58
+
59
+ // In case of readahead_cache_lookup_, index_iter_ could change to find the
60
+ // readahead size in BlockCacheLookupForReadAheadSize so it needs to reseek.
61
+ if (IsIndexAtCurr() && block_iter_points_to_real_block_ &&
62
+ block_iter_.Valid()) {
48
63
  // Reseek.
49
64
  prev_block_offset_ = index_iter_->value().handle.offset();
50
65
 
@@ -72,13 +87,31 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
72
87
  } else {
73
88
  index_iter_->SeekToFirst();
74
89
  }
90
+ is_index_at_curr_block_ = true;
91
+ if (!index_iter_->Valid()) {
92
+ ResetDataIter();
93
+ return;
94
+ }
95
+ }
96
+
97
+ if (autotune_readaheadsize) {
98
+ FindReadAheadSizeUpperBound();
99
+ if (target) {
100
+ index_iter_->Seek(*target);
101
+ } else {
102
+ index_iter_->SeekToFirst();
103
+ }
75
104
 
105
+ // Check for IO error.
76
106
  if (!index_iter_->Valid()) {
77
107
  ResetDataIter();
78
108
  return;
79
109
  }
80
110
  }
81
111
 
112
+ // After reseek, index_iter_ point to the right key i.e. target in
113
+ // case of readahead_cache_lookup_. So index_iter_ can be used directly.
114
+
82
115
  IndexValue v = index_iter_->value();
83
116
  const bool same_block = block_iter_points_to_real_block_ &&
84
117
  v.handle.offset() == prev_block_offset_;
@@ -135,6 +168,8 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
135
168
  }
136
169
 
137
170
  void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
171
+ direction_ = IterDirection::kBackward;
172
+ ResetBlockCacheLookupVar();
138
173
  is_out_of_bound_ = false;
139
174
  is_at_first_key_from_index_ = false;
140
175
  seek_stat_state_ = kNone;
@@ -171,6 +206,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
171
206
  // to distinguish the two unless we read the second block. In this case, we'll
172
207
  // end up with reading two blocks.
173
208
  index_iter_->Seek(target);
209
+ is_index_at_curr_block_ = true;
174
210
 
175
211
  if (!index_iter_->Valid()) {
176
212
  auto seek_status = index_iter_->status();
@@ -206,15 +242,22 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
206
242
  }
207
243
 
208
244
  void BlockBasedTableIterator::SeekToLast() {
245
+ direction_ = IterDirection::kBackward;
246
+ ResetBlockCacheLookupVar();
209
247
  is_out_of_bound_ = false;
210
248
  is_at_first_key_from_index_ = false;
211
249
  seek_stat_state_ = kNone;
250
+
212
251
  SavePrevIndexValue();
252
+
213
253
  index_iter_->SeekToLast();
254
+ is_index_at_curr_block_ = true;
255
+
214
256
  if (!index_iter_->Valid()) {
215
257
  ResetDataIter();
216
258
  return;
217
259
  }
260
+
218
261
  InitDataBlock();
219
262
  block_iter_.SeekToLast();
220
263
  FindKeyBackward();
@@ -243,6 +286,14 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) {
243
286
  }
244
287
 
245
288
  void BlockBasedTableIterator::Prev() {
289
+ // Return Error.
290
+ if (readahead_cache_lookup_) {
291
+ block_iter_.Invalidate(Status::NotSupported(
292
+ "auto tuning of readahead_size is not supported with Prev operation."));
293
+ return;
294
+ }
295
+
296
+ ResetBlockCacheLookupVar();
246
297
  if (is_at_first_key_from_index_) {
247
298
  is_at_first_key_from_index_ = false;
248
299
 
@@ -262,7 +313,18 @@ void BlockBasedTableIterator::Prev() {
262
313
  }
263
314
 
264
315
  void BlockBasedTableIterator::InitDataBlock() {
265
- BlockHandle data_block_handle = index_iter_->value().handle;
316
+ BlockHandle data_block_handle;
317
+ bool is_in_cache = false;
318
+ bool use_block_cache_for_lookup = true;
319
+
320
+ if (DoesContainBlockHandles()) {
321
+ data_block_handle = block_handles_.front().handle_;
322
+ is_in_cache = block_handles_.front().is_cache_hit_;
323
+ use_block_cache_for_lookup = false;
324
+ } else {
325
+ data_block_handle = index_iter_->value().handle;
326
+ }
327
+
266
328
  if (!block_iter_points_to_real_block_ ||
267
329
  data_block_handle.offset() != prev_block_offset_ ||
268
330
  // if previous attempt of reading the block missed cache, try again
@@ -270,25 +332,50 @@ void BlockBasedTableIterator::InitDataBlock() {
270
332
  if (block_iter_points_to_real_block_) {
271
333
  ResetDataIter();
272
334
  }
273
- auto* rep = table_->get_rep();
274
335
 
275
336
  bool is_for_compaction =
276
337
  lookup_context_.caller == TableReaderCaller::kCompaction;
277
- // Prefetch additional data for range scans (iterators).
278
- // Implicit auto readahead:
279
- // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
280
- // Explicit user requested readahead:
281
- // Enabled from the very first IO when ReadOptions.readahead_size is set.
282
- block_prefetcher_.PrefetchIfNeeded(
283
- rep, data_block_handle, read_options_.readahead_size, is_for_compaction,
284
- /*no_sequential_checking=*/false, read_options_.rate_limiter_priority);
285
- Status s;
286
- table_->NewDataBlockIterator<DataBlockIter>(
287
- read_options_, data_block_handle, &block_iter_, BlockType::kData,
288
- /*get_context=*/nullptr, &lookup_context_,
289
- block_prefetcher_.prefetch_buffer(),
290
- /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
338
+
339
+ // Initialize Data Block From CacheableEntry.
340
+ if (is_in_cache) {
341
+ Status s;
342
+ block_iter_.Invalidate(Status::OK());
343
+ table_->NewDataBlockIterator<DataBlockIter>(
344
+ read_options_, (block_handles_.front().cachable_entry_).As<Block>(),
345
+ &block_iter_, s);
346
+ } else {
347
+ auto* rep = table_->get_rep();
348
+
349
+ std::function<void(uint64_t offset, size_t, size_t&)> readaheadsize_cb =
350
+ nullptr;
351
+ if (readahead_cache_lookup_) {
352
+ readaheadsize_cb = std::bind(
353
+ &BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this,
354
+ std::placeholders::_1, std::placeholders::_2,
355
+ std::placeholders::_3);
356
+ }
357
+
358
+ // Prefetch additional data for range scans (iterators).
359
+ // Implicit auto readahead:
360
+ // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
361
+ // Explicit user requested readahead:
362
+ // Enabled from the very first IO when ReadOptions.readahead_size is
363
+ // set.
364
+ block_prefetcher_.PrefetchIfNeeded(
365
+ rep, data_block_handle, read_options_.readahead_size,
366
+ is_for_compaction,
367
+ /*no_sequential_checking=*/false, read_options_, readaheadsize_cb);
368
+
369
+ Status s;
370
+ table_->NewDataBlockIterator<DataBlockIter>(
371
+ read_options_, data_block_handle, &block_iter_, BlockType::kData,
372
+ /*get_context=*/nullptr, &lookup_context_,
373
+ block_prefetcher_.prefetch_buffer(),
374
+ /*for_compaction=*/is_for_compaction, /*async_read=*/false, s,
375
+ use_block_cache_for_lookup);
376
+ }
291
377
  block_iter_points_to_real_block_ = true;
378
+
292
379
  CheckDataBlockWithinUpperBound();
293
380
  if (!is_for_compaction &&
294
381
  (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) {
@@ -314,6 +401,16 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
314
401
  ResetDataIter();
315
402
  }
316
403
  auto* rep = table_->get_rep();
404
+
405
+ std::function<void(uint64_t offset, size_t, size_t&)> readaheadsize_cb =
406
+ nullptr;
407
+ if (readahead_cache_lookup_) {
408
+ readaheadsize_cb = std::bind(
409
+ &BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this,
410
+ std::placeholders::_1, std::placeholders::_2,
411
+ std::placeholders::_3);
412
+ }
413
+
317
414
  // Prefetch additional data for range scans (iterators).
318
415
  // Implicit auto readahead:
319
416
  // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
@@ -326,14 +423,15 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
326
423
  block_prefetcher_.PrefetchIfNeeded(
327
424
  rep, data_block_handle, read_options_.readahead_size,
328
425
  is_for_compaction, /*no_sequential_checking=*/read_options_.async_io,
329
- read_options_.rate_limiter_priority);
426
+ read_options_, readaheadsize_cb);
330
427
 
331
428
  Status s;
332
429
  table_->NewDataBlockIterator<DataBlockIter>(
333
430
  read_options_, data_block_handle, &block_iter_, BlockType::kData,
334
431
  /*get_context=*/nullptr, &lookup_context_,
335
432
  block_prefetcher_.prefetch_buffer(),
336
- /*for_compaction=*/is_for_compaction, /*async_read=*/true, s);
433
+ /*for_compaction=*/is_for_compaction, /*async_read=*/true, s,
434
+ /*use_block_cache_for_lookup=*/true);
337
435
 
338
436
  if (s.IsTryAgain()) {
339
437
  async_read_in_progress_ = true;
@@ -348,7 +446,8 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
348
446
  read_options_, data_block_handle, &block_iter_, BlockType::kData,
349
447
  /*get_context=*/nullptr, &lookup_context_,
350
448
  block_prefetcher_.prefetch_buffer(),
351
- /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
449
+ /*for_compaction=*/is_for_compaction, /*async_read=*/false, s,
450
+ /*use_block_cache_for_lookup=*/false);
352
451
  }
353
452
  block_iter_points_to_real_block_ = true;
354
453
  CheckDataBlockWithinUpperBound();
@@ -379,20 +478,29 @@ bool BlockBasedTableIterator::MaterializeCurrentBlock() {
379
478
 
380
479
  block_iter_.SeekToFirst();
381
480
 
481
+ // MaterializeCurrentBlock is called when block is actually read by
482
+ // calling InitDataBlock. is_at_first_key_from_index_ will be false for block
483
+ // handles placed in blockhandle. So index_ will be pointing to current block.
484
+ // After InitDataBlock, index_iter_ can point to different block if
485
+ // BlockCacheLookupForReadAheadSize is called.
486
+ Slice first_internal_key;
487
+ if (DoesContainBlockHandles()) {
488
+ first_internal_key = block_handles_.front().first_internal_key_;
489
+ } else {
490
+ first_internal_key = index_iter_->value().first_internal_key;
491
+ }
492
+
382
493
  if (!block_iter_.Valid() ||
383
- icomp_.Compare(block_iter_.key(),
384
- index_iter_->value().first_internal_key) != 0) {
494
+ icomp_.Compare(block_iter_.key(), first_internal_key) != 0) {
385
495
  block_iter_.Invalidate(Status::Corruption(
386
496
  "first key in index doesn't match first key in block"));
387
497
  return false;
388
498
  }
389
-
390
499
  return true;
391
500
  }
392
501
 
393
502
  void BlockBasedTableIterator::FindKeyForward() {
394
503
  // This method's code is kept short to make it likely to be inlined.
395
-
396
504
  assert(!is_out_of_bound_);
397
505
  assert(block_iter_points_to_real_block_);
398
506
 
@@ -415,40 +523,72 @@ void BlockBasedTableIterator::FindBlockForward() {
415
523
  return;
416
524
  }
417
525
  // Whether next data block is out of upper bound, if there is one.
418
- const bool next_block_is_out_of_bound =
419
- read_options_.iterate_upper_bound != nullptr &&
526
+ // index_iter_ can point to different block in case of
527
+ // readahead_cache_lookup_. readahead_cache_lookup_ will be handle the
528
+ // upper_bound check.
529
+ bool next_block_is_out_of_bound =
530
+ IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr &&
420
531
  block_iter_points_to_real_block_ &&
421
532
  block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock;
533
+
422
534
  assert(!next_block_is_out_of_bound ||
423
535
  user_comparator_.CompareWithoutTimestamp(
424
536
  *read_options_.iterate_upper_bound, /*a_has_ts=*/false,
425
537
  index_iter_->user_key(), /*b_has_ts=*/true) <= 0);
538
+
426
539
  ResetDataIter();
427
- index_iter_->Next();
428
- if (next_block_is_out_of_bound) {
429
- // The next block is out of bound. No need to read it.
430
- TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
431
- // We need to make sure this is not the last data block before setting
432
- // is_out_of_bound_, since the index key for the last data block can be
433
- // larger than smallest key of the next file on the same level.
434
- if (index_iter_->Valid()) {
435
- is_out_of_bound_ = true;
436
- }
437
- return;
438
- }
439
540
 
440
- if (!index_iter_->Valid()) {
441
- return;
541
+ if (DoesContainBlockHandles()) {
542
+ // Advance and point to that next Block handle to make that block handle
543
+ // current.
544
+ block_handles_.pop_front();
442
545
  }
443
546
 
444
- IndexValue v = index_iter_->value();
547
+ if (!DoesContainBlockHandles()) {
548
+ // For readahead_cache_lookup_ enabled scenario -
549
+ // 1. In case of Seek, block_handle will be empty and it should be follow
550
+ // as usual doing index_iter_->Next().
551
+ // 2. If block_handles is empty and index is not at current because of
552
+ // lookup (during Next), it should skip doing index_iter_->Next(), as
553
+ // it's already pointing to next block;
554
+ // 3. Last block could be out of bound and it won't iterate over that
555
+ // during BlockCacheLookup. We need to set for that block here.
556
+ if (IsIndexAtCurr() || is_index_out_of_bound_) {
557
+ index_iter_->Next();
558
+ if (is_index_out_of_bound_) {
559
+ next_block_is_out_of_bound = is_index_out_of_bound_;
560
+ is_index_out_of_bound_ = false;
561
+ }
562
+ } else {
563
+ // Skip Next as index_iter_ already points to correct index when it
564
+ // iterates in BlockCacheLookupForReadAheadSize.
565
+ is_index_at_curr_block_ = true;
566
+ }
445
567
 
446
- if (!v.first_internal_key.empty() && allow_unprepared_value_) {
447
- // Index contains the first key of the block. Defer reading the block.
448
- is_at_first_key_from_index_ = true;
449
- return;
450
- }
568
+ if (next_block_is_out_of_bound) {
569
+ // The next block is out of bound. No need to read it.
570
+ TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound",
571
+ nullptr);
572
+ // We need to make sure this is not the last data block before setting
573
+ // is_out_of_bound_, since the index key for the last data block can be
574
+ // larger than smallest key of the next file on the same level.
575
+ if (index_iter_->Valid()) {
576
+ is_out_of_bound_ = true;
577
+ }
578
+ return;
579
+ }
580
+
581
+ if (!index_iter_->Valid()) {
582
+ return;
583
+ }
584
+ IndexValue v = index_iter_->value();
451
585
 
586
+ if (!v.first_internal_key.empty() && allow_unprepared_value_) {
587
+ // Index contains the first key of the block. Defer reading the block.
588
+ is_at_first_key_from_index_ = true;
589
+ return;
590
+ }
591
+ }
452
592
  InitDataBlock();
453
593
  block_iter_.SeekToFirst();
454
594
  } while (!block_iter_.Valid());
@@ -487,7 +627,7 @@ void BlockBasedTableIterator::CheckOutOfBound() {
487
627
  }
488
628
 
489
629
  void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
490
- if (read_options_.iterate_upper_bound != nullptr &&
630
+ if (IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr &&
491
631
  block_iter_points_to_real_block_) {
492
632
  block_upper_bound_check_ = (user_comparator_.CompareWithoutTimestamp(
493
633
  *read_options_.iterate_upper_bound,
@@ -497,4 +637,136 @@ void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
497
637
  : BlockUpperBound::kUpperBoundInCurBlock;
498
638
  }
499
639
  }
640
+
641
+ void BlockBasedTableIterator::FindReadAheadSizeUpperBound() {
642
+ size_t total_bytes_till_upper_bound = 0;
643
+ size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
644
+ uint64_t start_offset = index_iter_->value().handle.offset();
645
+
646
+ do {
647
+ BlockHandle block_handle = index_iter_->value().handle;
648
+ total_bytes_till_upper_bound += block_handle.size();
649
+ total_bytes_till_upper_bound += footer;
650
+
651
+ // Can't figure out for current block if current block
652
+ // is out of bound. But for next block we can find that.
653
+ // If curr block's index key >= iterate_upper_bound, it
654
+ // means all the keys in next block or above are out of
655
+ // bound.
656
+ if (IsNextBlockOutOfBound()) {
657
+ break;
658
+ }
659
+
660
+ // Since next block is not out of bound, iterate to that
661
+ // index block and add it's Data block size to
662
+ // readahead_size.
663
+ index_iter_->Next();
664
+
665
+ if (!index_iter_->Valid()) {
666
+ break;
667
+ }
668
+
669
+ } while (true);
670
+
671
+ block_prefetcher_.SetUpperBoundOffset(start_offset +
672
+ total_bytes_till_upper_bound);
673
+ }
674
+
675
+ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
676
+ uint64_t offset, size_t readahead_size, size_t& updated_readahead_size) {
677
+ updated_readahead_size = readahead_size;
678
+
679
+ // readahead_cache_lookup_ can be set false after Seek, if after Seek or Next
680
+ // there is SeekForPrev or any other backward operation.
681
+ if (!readahead_cache_lookup_) {
682
+ return;
683
+ }
684
+
685
+ assert(!DoesContainBlockHandles());
686
+ assert(index_iter_->value().handle.offset() == offset);
687
+
688
+ // Error. current offset should be equal to what's requested for prefetching.
689
+ if (index_iter_->value().handle.offset() != offset) {
690
+ return;
691
+ }
692
+
693
+ if (IsNextBlockOutOfBound()) {
694
+ updated_readahead_size = 0;
695
+ return;
696
+ }
697
+
698
+ size_t current_readahead_size = 0;
699
+ size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
700
+
701
+ // Add the current block to block_handles_.
702
+ {
703
+ BlockHandleInfo block_handle_info;
704
+ block_handle_info.handle_ = index_iter_->value().handle;
705
+ block_handle_info.SetFirstInternalKey(
706
+ index_iter_->value().first_internal_key);
707
+ block_handles_.emplace_back(std::move(block_handle_info));
708
+ }
709
+
710
+ // Current block is included in length. Readahead should start from next
711
+ // block.
712
+ index_iter_->Next();
713
+ is_index_at_curr_block_ = false;
714
+
715
+ while (index_iter_->Valid()) {
716
+ BlockHandle block_handle = index_iter_->value().handle;
717
+
718
+ // Adding this data block exceeds passed down readahead_size. So this data
719
+ // block won't be added.
720
+ if (current_readahead_size + block_handle.size() + footer >
721
+ readahead_size) {
722
+ break;
723
+ }
724
+
725
+ current_readahead_size += block_handle.size();
726
+ current_readahead_size += footer;
727
+
728
+ // For current data block, do the lookup in the cache. Lookup should pin the
729
+ // data block and add the placeholder for cache.
730
+ BlockHandleInfo block_handle_info;
731
+ block_handle_info.handle_ = index_iter_->value().handle;
732
+ block_handle_info.SetFirstInternalKey(
733
+ index_iter_->value().first_internal_key);
734
+
735
+ Status s = table_->LookupAndPinBlocksInCache<Block_kData>(
736
+ read_options_, block_handle,
737
+ &(block_handle_info.cachable_entry_).As<Block_kData>());
738
+ if (!s.ok()) {
739
+ break;
740
+ }
741
+
742
+ block_handle_info.is_cache_hit_ =
743
+ (block_handle_info.cachable_entry_.GetValue() ||
744
+ block_handle_info.cachable_entry_.GetCacheHandle());
745
+
746
+ // Add the handle to the queue.
747
+ block_handles_.emplace_back(std::move(block_handle_info));
748
+
749
+ // Can't figure out for current block if current block
750
+ // is out of bound. But for next block we can find that.
751
+ // If curr block's index key >= iterate_upper_bound, it
752
+ // means all the keys in next block or above are out of
753
+ // bound.
754
+ if (IsNextBlockOutOfBound()) {
755
+ is_index_out_of_bound_ = true;
756
+ break;
757
+ }
758
+ index_iter_->Next();
759
+ };
760
+
761
+ // Iterate cache hit block handles from the end till a Miss is there, to
762
+ // update the readahead_size.
763
+ for (auto it = block_handles_.rbegin();
764
+ it != block_handles_.rend() && (*it).is_cache_hit_ == true; ++it) {
765
+ current_readahead_size -= (*it).handle_.size();
766
+ current_readahead_size -= footer;
767
+ }
768
+ updated_readahead_size = current_readahead_size;
769
+ ResetPreviousBlockOffset();
770
+ }
771
+
500
772
  } // namespace ROCKSDB_NAMESPACE
@@ -7,6 +7,8 @@
7
7
  // Use of this source code is governed by a BSD-style license that can be
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
  #pragma once
10
+ #include <deque>
11
+
10
12
  #include "table/block_based/block_based_table_reader.h"
11
13
  #include "table/block_based/block_based_table_reader_impl.h"
12
14
  #include "table/block_based/block_prefetcher.h"
@@ -44,7 +46,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
44
46
  async_read_in_progress_(false),
45
47
  is_last_level_(table->IsLastLevel()) {}
46
48
 
47
- ~BlockBasedTableIterator() {}
49
+ ~BlockBasedTableIterator() override { ClearBlockHandles(); }
48
50
 
49
51
  void Seek(const Slice& target) override;
50
52
  void SeekForPrev(const Slice& target) override;
@@ -58,6 +60,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
58
60
  (is_at_first_key_from_index_ ||
59
61
  (block_iter_points_to_real_block_ && block_iter_.Valid()));
60
62
  }
63
+
64
+ // For block cache readahead lookup scenario -
65
+ // If is_at_first_key_from_index_ is true, InitDataBlock hasn't been
66
+ // called. It means block_handles is empty and index_ point to current block.
67
+ // So index_iter_ can be accessed directly.
61
68
  Slice key() const override {
62
69
  assert(Valid());
63
70
  if (is_at_first_key_from_index_) {
@@ -74,6 +81,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
74
81
  return block_iter_.user_key();
75
82
  }
76
83
  }
84
+
77
85
  bool PrepareValue() override {
78
86
  assert(Valid());
79
87
 
@@ -104,8 +112,12 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
104
112
  return block_iter_.value();
105
113
  }
106
114
  Status status() const override {
107
- // Prefix index set status to NotFound when the prefix does not exist
108
- if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {
115
+ // In case of block cache readahead lookup, it won't add the block to
116
+ // block_handles if it's index is invalid. So index_iter_->status check can
117
+ // be skipped.
118
+ // Prefix index set status to NotFound when the prefix does not exist.
119
+ if (IsIndexAtCurr() && !index_iter_->status().ok() &&
120
+ !index_iter_->status().IsNotFound()) {
109
121
  return index_iter_->status();
110
122
  } else if (block_iter_points_to_real_block_) {
111
123
  return block_iter_.status();
@@ -159,7 +171,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
159
171
  }
160
172
 
161
173
  void SavePrevIndexValue() {
162
- if (block_iter_points_to_real_block_) {
174
+ if (block_iter_points_to_real_block_ && IsIndexAtCurr()) {
163
175
  // Reseek. If they end up with the same data block, we shouldn't re-fetch
164
176
  // the same data block.
165
177
  prev_block_offset_ = index_iter_->value().handle.offset();
@@ -235,6 +247,28 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
235
247
  kReportOnUseful = 1 << 2,
236
248
  };
237
249
 
250
+ // BlockHandleInfo is used to store the info needed when block cache lookup
251
+ // ahead is enabled to tune readahead_size.
252
+ struct BlockHandleInfo {
253
+ void SetFirstInternalKey(const Slice& key) {
254
+ if (key.empty()) {
255
+ return;
256
+ }
257
+ size_t size = key.size();
258
+ buf_ = std::unique_ptr<char[]>(new char[size]);
259
+ memcpy(buf_.get(), key.data(), size);
260
+ first_internal_key_ = Slice(buf_.get(), size);
261
+ }
262
+
263
+ BlockHandle handle_;
264
+ bool is_cache_hit_ = false;
265
+ CachableEntry<Block> cachable_entry_;
266
+ Slice first_internal_key_;
267
+ std::unique_ptr<char[]> buf_;
268
+ };
269
+
270
+ bool IsIndexAtCurr() const { return is_index_at_curr_block_; }
271
+
238
272
  const BlockBasedTable* table_;
239
273
  const ReadOptions& read_options_;
240
274
  const InternalKeyComparator& icomp_;
@@ -268,6 +302,29 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
268
302
  mutable SeekStatState seek_stat_state_ = SeekStatState::kNone;
269
303
  bool is_last_level_;
270
304
 
305
+ // If set to true, it'll lookup in the cache ahead to estimate the readahead
306
+ // size based on cache hit and miss.
307
+ bool readahead_cache_lookup_ = false;
308
+
309
+ // It stores all the block handles that are lookuped in cache ahead when
310
+ // BlockCacheLookupForReadAheadSize is called. Since index_iter_ may point to
311
+ // different blocks when readahead_size is calculated in
312
+ // BlockCacheLookupForReadAheadSize, to avoid index_iter_ reseek,
313
+ // block_handles_ is used.
314
+ std::deque<BlockHandleInfo> block_handles_;
315
+
316
+ // During cache lookup to find readahead size, index_iter_ is iterated and it
317
+ // can point to a different block. is_index_at_curr_block_ keeps track of
318
+ // that.
319
+ bool is_index_at_curr_block_ = true;
320
+ bool is_index_out_of_bound_ = false;
321
+
322
+ // Used in case of auto_readahead_size to disable the block_cache lookup if
323
+ // direction is reversed from forward to backward. In case of backward
324
+ // direction, SeekForPrev or Prev might call Seek from db_iter. So direction
325
+ // is used to disable the lookup.
326
+ IterDirection direction_ = IterDirection::kForward;
327
+
271
328
  // If `target` is null, seek to first.
272
329
  void SeekImpl(const Slice* target, bool async_prefetch);
273
330
 
@@ -306,5 +363,42 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
306
363
  }
307
364
  return true;
308
365
  }
366
+
367
+ // *** BEGIN APIs relevant to auto tuning of readahead_size ***
368
+ void FindReadAheadSizeUpperBound();
369
+
370
+ // This API is called to lookup the data blocks ahead in the cache to estimate
371
+ // the current readahead_size.
372
+ void BlockCacheLookupForReadAheadSize(uint64_t offset, size_t readahead_size,
373
+ size_t& updated_readahead_size);
374
+
375
+ void ResetBlockCacheLookupVar() {
376
+ is_index_out_of_bound_ = false;
377
+ readahead_cache_lookup_ = false;
378
+ ClearBlockHandles();
379
+ }
380
+
381
+ bool IsNextBlockOutOfBound() {
382
+ // If curr block's index key >= iterate_upper_bound, it means all the keys
383
+ // in next block or above are out of bound.
384
+ return (user_comparator_.CompareWithoutTimestamp(
385
+ index_iter_->user_key(),
386
+ /*a_has_ts=*/true, *read_options_.iterate_upper_bound,
387
+ /*b_has_ts=*/false) >= 0
388
+ ? true
389
+ : false);
390
+ }
391
+
392
+ void ClearBlockHandles() { block_handles_.clear(); }
393
+
394
+ // Reset prev_block_offset_. If index_iter_ has moved ahead, it won't get
395
+ // accurate prev_block_offset_.
396
+ void ResetPreviousBlockOffset() {
397
+ prev_block_offset_ = std::numeric_limits<uint64_t>::max();
398
+ }
399
+
400
+ bool DoesContainBlockHandles() { return !block_handles_.empty(); }
401
+
402
+ // *** END APIs relevant to auto tuning of readahead_size ***
309
403
  };
310
404
  } // namespace ROCKSDB_NAMESPACE