rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -619,6 +619,21 @@ Status BlockBasedTableFactory::ValidateOptions(
619
619
  "Enable block_align, but compression "
620
620
  "enabled");
621
621
  }
622
+ if (table_options_.block_align &&
623
+ cf_opts.bottommost_compression != kDisableCompressionOption &&
624
+ cf_opts.bottommost_compression != kNoCompression) {
625
+ return Status::InvalidArgument(
626
+ "Enable block_align, but bottommost_compression enabled");
627
+ }
628
+ if (table_options_.block_align) {
629
+ for (auto level_compression : cf_opts.compression_per_level) {
630
+ if (level_compression != kDisableCompressionOption &&
631
+ level_compression != kNoCompression) {
632
+ return Status::InvalidArgument(
633
+ "Enable block_align, but compression_per_level enabled");
634
+ }
635
+ }
636
+ }
622
637
  if (table_options_.block_align &&
623
638
  (table_options_.block_size & (table_options_.block_size - 1))) {
624
639
  return Status::InvalidArgument(
@@ -338,8 +338,8 @@ void BlockBasedTableIterator::InitDataBlock() {
338
338
  bool use_block_cache_for_lookup = true;
339
339
 
340
340
  if (DoesContainBlockHandles()) {
341
- data_block_handle = block_handles_.front().handle_;
342
- is_in_cache = block_handles_.front().is_cache_hit_;
341
+ data_block_handle = block_handles_->front().handle_;
342
+ is_in_cache = block_handles_->front().is_cache_hit_;
343
343
  use_block_cache_for_lookup = false;
344
344
  } else {
345
345
  data_block_handle = index_iter_->value().handle;
@@ -361,7 +361,7 @@ void BlockBasedTableIterator::InitDataBlock() {
361
361
  Status s;
362
362
  block_iter_.Invalidate(Status::OK());
363
363
  table_->NewDataBlockIterator<DataBlockIter>(
364
- read_options_, (block_handles_.front().cachable_entry_).As<Block>(),
364
+ read_options_, (block_handles_->front().cachable_entry_).As<Block>(),
365
365
  &block_iter_, s);
366
366
  } else {
367
367
  auto* rep = table_->get_rep();
@@ -466,8 +466,8 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
466
466
  bool is_in_cache = false;
467
467
 
468
468
  if (DoesContainBlockHandles()) {
469
- data_block_handle = block_handles_.front().handle_;
470
- is_in_cache = block_handles_.front().is_cache_hit_;
469
+ data_block_handle = block_handles_->front().handle_;
470
+ is_in_cache = block_handles_->front().is_cache_hit_;
471
471
  } else {
472
472
  data_block_handle = index_iter_->value().handle;
473
473
  }
@@ -477,7 +477,7 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
477
477
  if (is_in_cache) {
478
478
  block_iter_.Invalidate(Status::OK());
479
479
  table_->NewDataBlockIterator<DataBlockIter>(
480
- read_options_, (block_handles_.front().cachable_entry_).As<Block>(),
480
+ read_options_, (block_handles_->front().cachable_entry_).As<Block>(),
481
481
  &block_iter_, s);
482
482
  } else {
483
483
  table_->NewDataBlockIterator<DataBlockIter>(
@@ -524,7 +524,7 @@ bool BlockBasedTableIterator::MaterializeCurrentBlock() {
524
524
  // BlockCacheLookupForReadAheadSize is called.
525
525
  Slice first_internal_key;
526
526
  if (DoesContainBlockHandles()) {
527
- first_internal_key = block_handles_.front().first_internal_key_;
527
+ first_internal_key = block_handles_->front().first_internal_key_;
528
528
  } else {
529
529
  first_internal_key = index_iter_->value().first_internal_key;
530
530
  }
@@ -580,7 +580,7 @@ void BlockBasedTableIterator::FindBlockForward() {
580
580
  if (DoesContainBlockHandles()) {
581
581
  // Advance and point to that next Block handle to make that block handle
582
582
  // current.
583
- block_handles_.pop_front();
583
+ block_handles_->pop_front();
584
584
  }
585
585
 
586
586
  if (!DoesContainBlockHandles()) {
@@ -681,7 +681,8 @@ void BlockBasedTableIterator::InitializeStartAndEndOffsets(
681
681
  bool read_curr_block, bool& found_first_miss_block,
682
682
  uint64_t& start_updated_offset, uint64_t& end_updated_offset,
683
683
  size_t& prev_handles_size) {
684
- prev_handles_size = block_handles_.size();
684
+ assert(block_handles_ != nullptr);
685
+ prev_handles_size = block_handles_->size();
685
686
  size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
686
687
 
687
688
  // It initialize start and end offset to begin which is covered by following
@@ -701,7 +702,7 @@ void BlockBasedTableIterator::InitializeStartAndEndOffsets(
701
702
 
702
703
  end_updated_offset = block_handle_info.handle_.offset() + footer +
703
704
  block_handle_info.handle_.size();
704
- block_handles_.emplace_back(std::move(block_handle_info));
705
+ block_handles_->emplace_back(std::move(block_handle_info));
705
706
 
706
707
  index_iter_->Next();
707
708
  is_index_at_curr_block_ = false;
@@ -717,17 +718,17 @@ void BlockBasedTableIterator::InitializeStartAndEndOffsets(
717
718
  // Initialize prev_handles_size to 0 as all those handles need to be read
718
719
  // again.
719
720
  prev_handles_size = 0;
720
- start_updated_offset = block_handles_.front().handle_.offset();
721
- end_updated_offset = block_handles_.back().handle_.offset() + footer +
722
- block_handles_.back().handle_.size();
721
+ start_updated_offset = block_handles_->front().handle_.offset();
722
+ end_updated_offset = block_handles_->back().handle_.offset() + footer +
723
+ block_handles_->back().handle_.size();
723
724
  }
724
725
  } else {
725
726
  // Scenario 3 : read_curr_block is false (callback made to do additional
726
727
  // prefetching in buffers) and the queue already has some
727
728
  // handles from first buffer.
728
729
  if (DoesContainBlockHandles()) {
729
- start_updated_offset = block_handles_.back().handle_.offset() + footer +
730
- block_handles_.back().handle_.size();
730
+ start_updated_offset = block_handles_->back().handle_.offset() + footer +
731
+ block_handles_->back().handle_.size();
731
732
  end_updated_offset = start_updated_offset;
732
733
  } else {
733
734
  // Scenario 4 : read_curr_block is false (callback made to do additional
@@ -789,6 +790,9 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
789
790
 
790
791
  // Initialize start and end offsets based on exisiting handles in the queue
791
792
  // and read_curr_block argument passed.
793
+ if (block_handles_ == nullptr) {
794
+ block_handles_.reset(new std::deque<BlockHandleInfo>());
795
+ }
792
796
  InitializeStartAndEndOffsets(read_curr_block, found_first_miss_block,
793
797
  start_updated_offset, end_updated_offset,
794
798
  prev_handles_size);
@@ -819,6 +823,12 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
819
823
  read_options_, block_handle,
820
824
  &(block_handle_info.cachable_entry_).As<Block_kData>());
821
825
  if (!s.ok()) {
826
+ #ifndef NDEBUG
827
+ // To allow fault injection verification to pass since non-okay status in
828
+ // `BlockCacheLookupForReadAheadSize()` won't fail the read but to have
829
+ // less or no readahead
830
+ IGNORE_STATUS_IF_ERROR(s);
831
+ #endif
822
832
  break;
823
833
  }
824
834
 
@@ -833,7 +843,7 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
833
843
  }
834
844
 
835
845
  // Add the handle to the queue.
836
- block_handles_.emplace_back(std::move(block_handle_info));
846
+ block_handles_->emplace_back(std::move(block_handle_info));
837
847
 
838
848
  // Can't figure out for current block if current block
839
849
  // is out of bound. But for next block we can find that.
@@ -848,12 +858,21 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
848
858
  is_index_at_curr_block_ = false;
849
859
  }
850
860
 
861
+ #ifndef NDEBUG
862
+ // To allow fault injection verification to pass since non-okay status in
863
+ // `BlockCacheLookupForReadAheadSize()` won't fail the read but to have less
864
+ // or no readahead
865
+ if (!index_iter_->status().ok()) {
866
+ IGNORE_STATUS_IF_ERROR(index_iter_->status());
867
+ }
868
+ #endif
869
+
851
870
  if (found_first_miss_block) {
852
871
  // Iterate cache hit block handles from the end till a Miss is there, to
853
872
  // truncate and update the end offset till that Miss.
854
- auto it = block_handles_.rbegin();
873
+ auto it = block_handles_->rbegin();
855
874
  auto it_end =
856
- block_handles_.rbegin() + (block_handles_.size() - prev_handles_size);
875
+ block_handles_->rbegin() + (block_handles_->size() - prev_handles_size);
857
876
 
858
877
  while (it != it_end && (*it).is_cache_hit_ &&
859
878
  start_updated_offset != (*it).handle_.offset()) {
@@ -338,7 +338,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
338
338
  // different blocks when readahead_size is calculated in
339
339
  // BlockCacheLookupForReadAheadSize, to avoid index_iter_ reseek,
340
340
  // block_handles_ is used.
341
- std::deque<BlockHandleInfo> block_handles_;
341
+ // `block_handles_` is lazily constructed to save CPU when it is unused
342
+ std::unique_ptr<std::deque<BlockHandleInfo>> block_handles_;
342
343
 
343
344
  // During cache lookup to find readahead size, index_iter_ is iterated and it
344
345
  // can point to a different block. is_index_at_curr_block_ keeps track of
@@ -418,7 +419,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
418
419
  : false);
419
420
  }
420
421
 
421
- void ClearBlockHandles() { block_handles_.clear(); }
422
+ void ClearBlockHandles() {
423
+ if (block_handles_ != nullptr) {
424
+ block_handles_->clear();
425
+ }
426
+ }
422
427
 
423
428
  // Reset prev_block_offset_. If index_iter_ has moved ahead, it won't get
424
429
  // accurate prev_block_offset_.
@@ -426,7 +431,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
426
431
  prev_block_offset_ = std::numeric_limits<uint64_t>::max();
427
432
  }
428
433
 
429
- bool DoesContainBlockHandles() { return !block_handles_.empty(); }
434
+ bool DoesContainBlockHandles() {
435
+ return block_handles_ != nullptr && !block_handles_->empty();
436
+ }
430
437
 
431
438
  void InitializeStartAndEndOffsets(bool read_curr_block,
432
439
  bool& found_first_miss_block,
@@ -135,7 +135,46 @@ extern const uint64_t kBlockBasedTableMagicNumber;
135
135
  extern const std::string kHashIndexPrefixesBlock;
136
136
  extern const std::string kHashIndexPrefixesMetadataBlock;
137
137
 
138
- BlockBasedTable::~BlockBasedTable() { delete rep_; }
138
+ BlockBasedTable::~BlockBasedTable() {
139
+ auto ua = rep_->uncache_aggressiveness.LoadRelaxed();
140
+ if (ua > 0 && rep_->table_options.block_cache) {
141
+ if (rep_->filter) {
142
+ rep_->filter->EraseFromCacheBeforeDestruction(ua);
143
+ }
144
+ if (rep_->index_reader) {
145
+ {
146
+ // TODO: Also uncache data blocks known after any gaps in partitioned
147
+ // index. Right now the iterator errors out as soon as there's an
148
+ // index partition not in cache.
149
+ IndexBlockIter iiter_on_stack;
150
+ ReadOptions ropts;
151
+ ropts.read_tier = kBlockCacheTier; // No I/O
152
+ auto iiter = NewIndexIterator(
153
+ ropts, /*disable_prefix_seek=*/false, &iiter_on_stack,
154
+ /*get_context=*/nullptr, /*lookup_context=*/nullptr);
155
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
156
+ if (iiter != &iiter_on_stack) {
157
+ iiter_unique_ptr.reset(iiter);
158
+ }
159
+ // Un-cache the data blocks the index iterator with tell us about
160
+ // without I/O. (NOTE: It's extremely unlikely that a data block
161
+ // will be in block cache without the index block pointing to it
162
+ // also in block cache.)
163
+ UncacheAggressivenessAdvisor advisor(ua);
164
+ for (iiter->SeekToFirst(); iiter->Valid() && advisor.ShouldContinue();
165
+ iiter->Next()) {
166
+ bool erased = EraseFromCache(iiter->value().handle);
167
+ advisor.Report(erased);
168
+ }
169
+ iiter->status().PermitUncheckedError();
170
+ }
171
+
172
+ // Un-cache the index block(s)
173
+ rep_->index_reader->EraseFromCacheBeforeDestruction(ua);
174
+ }
175
+ }
176
+ delete rep_;
177
+ }
139
178
 
140
179
  namespace {
141
180
  // Read the block identified by "handle" from "file".
@@ -439,6 +478,7 @@ bool IsFeatureSupported(const TableProperties& table_properties,
439
478
  }
440
479
 
441
480
  // Caller has to ensure seqno is not nullptr.
481
+ // Set *seqno to the global sequence number for reading this file.
442
482
  Status GetGlobalSequenceNumber(const TableProperties& table_properties,
443
483
  SequenceNumber largest_seqno,
444
484
  SequenceNumber* seqno) {
@@ -461,12 +501,17 @@ Status GetGlobalSequenceNumber(const TableProperties& table_properties,
461
501
  }
462
502
 
463
503
  uint32_t version = DecodeFixed32(version_pos->second.c_str());
464
- if (version < 2) {
465
- if (seqno_pos != props.end() || version != 1) {
466
- std::array<char, 200> msg_buf;
504
+ if (version != 2) {
505
+ std::array<char, 200> msg_buf;
506
+ if (version != 1) {
507
+ snprintf(msg_buf.data(), msg_buf.max_size(),
508
+ "An external sst file has corrupted version %u.", version);
509
+ return Status::Corruption(msg_buf.data());
510
+ }
511
+ if (seqno_pos != props.end()) {
467
512
  // This is a v1 external sst file, global_seqno is not supported.
468
513
  snprintf(msg_buf.data(), msg_buf.max_size(),
469
- "An external sst file with version %u have global seqno "
514
+ "An external sst file with version %u has global seqno "
470
515
  "property with value %s",
471
516
  version, seqno_pos->second.c_str());
472
517
  return Status::Corruption(msg_buf.data());
@@ -594,6 +639,8 @@ Status BlockBasedTable::Open(
594
639
 
595
640
  // From read_options, retain deadline, io_timeout, rate_limiter_priority, and
596
641
  // verify_checksums. In future, we may retain more options.
642
+ // TODO: audit more ReadOptions and do this in a way that brings attention
643
+ // on new ReadOptions?
597
644
  ReadOptions ro;
598
645
  ro.deadline = read_options.deadline;
599
646
  ro.io_timeout = read_options.io_timeout;
@@ -844,6 +891,10 @@ Status BlockBasedTable::PrefetchTail(
844
891
  if (tail_size != 0) {
845
892
  tail_prefetch_size = tail_size;
846
893
  } else {
894
+ // Fallback for SST files, for which tail size is not recorded in the
895
+ // manifest. Eventually, this fallback might be removed, so it's
896
+ // better to make sure that such SST files get compacted.
897
+ // See https://github.com/facebook/rocksdb/issues/12664
847
898
  if (tail_prefetch_stats != nullptr) {
848
899
  // Multiple threads may get a 0 (no history) when running in parallel,
849
900
  // but it will get cleared after the first of them finishes.
@@ -858,14 +909,15 @@ Status BlockBasedTable::PrefetchTail(
858
909
  // properties, at which point we don't yet know the index type.
859
910
  tail_prefetch_size = prefetch_all || preload_all ? 512 * 1024 : 4 * 1024;
860
911
 
861
- ROCKS_LOG_WARN(logger,
862
- "Tail prefetch size %zu is calculated based on heuristics",
863
- tail_prefetch_size);
864
- } else {
865
912
  ROCKS_LOG_WARN(
866
913
  logger,
867
- "Tail prefetch size %zu is calculated based on TailPrefetchStats",
868
- tail_prefetch_size);
914
+ "[%s] Tail prefetch size %zu is calculated based on heuristics.",
915
+ file->file_name().c_str(), tail_prefetch_size);
916
+ } else {
917
+ ROCKS_LOG_WARN(logger,
918
+ "[%s] Tail prefetch size %zu is calculated based on "
919
+ "TailPrefetchStats.",
920
+ file->file_name().c_str(), tail_prefetch_size);
869
921
  }
870
922
  }
871
923
  size_t prefetch_off;
@@ -1521,9 +1573,8 @@ Status BlockBasedTable::LookupAndPinBlocksInCache(
1521
1573
  Status s;
1522
1574
  CachableEntry<UncompressionDict> uncompression_dict;
1523
1575
  if (rep_->uncompression_dict_reader) {
1524
- const bool no_io = (ro.read_tier == kBlockCacheTier);
1525
1576
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
1526
- /* prefetch_buffer= */ nullptr, ro, no_io, ro.verify_checksums,
1577
+ /* prefetch_buffer= */ nullptr, ro,
1527
1578
  /* get_context= */ nullptr, /* lookup_context= */ nullptr,
1528
1579
  &uncompression_dict);
1529
1580
  if (!s.ok()) {
@@ -1978,14 +2029,11 @@ bool BlockBasedTable::PrefixRangeMayMatch(
1978
2029
  FilterBlockReader* const filter = rep_->filter.get();
1979
2030
  *filter_checked = false;
1980
2031
  if (filter != nullptr) {
1981
- const bool no_io = read_options.read_tier == kBlockCacheTier;
1982
-
1983
2032
  const Slice* const const_ikey_ptr = &internal_key;
1984
2033
  may_match = filter->RangeMayExist(
1985
2034
  read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor,
1986
2035
  rep_->internal_comparator.user_comparator(), const_ikey_ptr,
1987
- filter_checked, need_upper_bound_check, no_io, lookup_context,
1988
- read_options);
2036
+ filter_checked, need_upper_bound_check, lookup_context, read_options);
1989
2037
  }
1990
2038
 
1991
2039
  return may_match;
@@ -2065,7 +2113,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
2065
2113
  }
2066
2114
 
2067
2115
  bool BlockBasedTable::FullFilterKeyMayMatch(
2068
- FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
2116
+ FilterBlockReader* filter, const Slice& internal_key,
2069
2117
  const SliceTransform* prefix_extractor, GetContext* get_context,
2070
2118
  BlockCacheLookupContext* lookup_context,
2071
2119
  const ReadOptions& read_options) const {
@@ -2078,7 +2126,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
2078
2126
  size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
2079
2127
  Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
2080
2128
  if (rep_->whole_key_filtering) {
2081
- may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr,
2129
+ may_match = filter->KeyMayMatch(user_key_without_ts, const_ikey_ptr,
2082
2130
  get_context, lookup_context, read_options);
2083
2131
  if (may_match) {
2084
2132
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE);
@@ -2092,7 +2140,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
2092
2140
  // FIXME ^^^: there should be no reason for Get() to depend on current
2093
2141
  // prefix_extractor at all. It should always use table_prefix_extractor.
2094
2142
  may_match = filter->PrefixMayMatch(
2095
- prefix_extractor->Transform(user_key_without_ts), no_io, const_ikey_ptr,
2143
+ prefix_extractor->Transform(user_key_without_ts), const_ikey_ptr,
2096
2144
  get_context, lookup_context, read_options);
2097
2145
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED);
2098
2146
  if (may_match) {
@@ -2108,7 +2156,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
2108
2156
  }
2109
2157
 
2110
2158
  void BlockBasedTable::FullFilterKeysMayMatch(
2111
- FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
2159
+ FilterBlockReader* filter, MultiGetRange* range,
2112
2160
  const SliceTransform* prefix_extractor,
2113
2161
  BlockCacheLookupContext* lookup_context,
2114
2162
  const ReadOptions& read_options) const {
@@ -2118,7 +2166,7 @@ void BlockBasedTable::FullFilterKeysMayMatch(
2118
2166
  uint64_t before_keys = range->KeysLeft();
2119
2167
  assert(before_keys > 0); // Caller should ensure
2120
2168
  if (rep_->whole_key_filtering) {
2121
- filter->KeysMayMatch(range, no_io, lookup_context, read_options);
2169
+ filter->KeysMayMatch(range, lookup_context, read_options);
2122
2170
  uint64_t after_keys = range->KeysLeft();
2123
2171
  if (after_keys) {
2124
2172
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
@@ -2134,7 +2182,7 @@ void BlockBasedTable::FullFilterKeysMayMatch(
2134
2182
  } else if (!PrefixExtractorChanged(prefix_extractor)) {
2135
2183
  // FIXME ^^^: there should be no reason for MultiGet() to depend on current
2136
2184
  // prefix_extractor at all. It should always use table_prefix_extractor.
2137
- filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context,
2185
+ filter->PrefixesMayMatch(range, prefix_extractor, lookup_context,
2138
2186
  read_options);
2139
2187
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
2140
2188
  uint64_t after_keys = range->KeysLeft();
@@ -2240,7 +2288,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2240
2288
  assert(key.size() >= 8); // key must be internal key
2241
2289
  assert(get_context != nullptr);
2242
2290
  Status s;
2243
- const bool no_io = read_options.read_tier == kBlockCacheTier;
2244
2291
 
2245
2292
  FilterBlockReader* const filter =
2246
2293
  !skip_filters ? rep_->filter.get() : nullptr;
@@ -2259,7 +2306,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2259
2306
  }
2260
2307
  TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
2261
2308
  const bool may_match =
2262
- FullFilterKeyMayMatch(filter, key, no_io, prefix_extractor, get_context,
2309
+ FullFilterKeyMayMatch(filter, key, prefix_extractor, get_context,
2263
2310
  &lookup_context, read_options);
2264
2311
  TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
2265
2312
  if (may_match) {
@@ -2309,7 +2356,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2309
2356
  /*for_compaction=*/false, /*async_read=*/false, tmp_status,
2310
2357
  /*use_block_cache_for_lookup=*/true);
2311
2358
 
2312
- if (no_io && biter.status().IsIncomplete()) {
2359
+ if (read_options.read_tier == kBlockCacheTier &&
2360
+ biter.status().IsIncomplete()) {
2313
2361
  // couldn't get block from block_cache
2314
2362
  // Update Saver.state to Found because we are only looking for
2315
2363
  // whether we can guarantee the key is not there when "no_io" is set
@@ -2421,7 +2469,6 @@ Status BlockBasedTable::MultiGetFilter(const ReadOptions& read_options,
2421
2469
 
2422
2470
  // First check the full filter
2423
2471
  // If full filter not useful, Then go into each block
2424
- const bool no_io = read_options.read_tier == kBlockCacheTier;
2425
2472
  uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
2426
2473
  if (mget_range->begin()->get_context) {
2427
2474
  tracing_mget_id = mget_range->begin()->get_context->get_tracing_get_id();
@@ -2429,8 +2476,8 @@ Status BlockBasedTable::MultiGetFilter(const ReadOptions& read_options,
2429
2476
  BlockCacheLookupContext lookup_context{
2430
2477
  TableReaderCaller::kUserMultiGet, tracing_mget_id,
2431
2478
  /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
2432
- FullFilterKeysMayMatch(filter, mget_range, no_io, prefix_extractor,
2433
- &lookup_context, read_options);
2479
+ FullFilterKeysMayMatch(filter, mget_range, prefix_extractor, &lookup_context,
2480
+ read_options);
2434
2481
 
2435
2482
  return Status::OK();
2436
2483
  }
@@ -2663,6 +2710,24 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
2663
2710
  return s;
2664
2711
  }
2665
2712
 
2713
+ bool BlockBasedTable::EraseFromCache(const BlockHandle& handle) const {
2714
+ assert(rep_ != nullptr);
2715
+
2716
+ Cache* const cache = rep_->table_options.block_cache.get();
2717
+ if (cache == nullptr) {
2718
+ return false;
2719
+ }
2720
+
2721
+ CacheKey key = GetCacheKey(rep_->base_cache_key, handle);
2722
+
2723
+ Cache::Handle* const cache_handle = cache->Lookup(key.AsSlice());
2724
+ if (cache_handle == nullptr) {
2725
+ return false;
2726
+ }
2727
+
2728
+ return cache->Release(cache_handle, /*erase_if_last_ref=*/true);
2729
+ }
2730
+
2666
2731
  bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
2667
2732
  assert(rep_ != nullptr);
2668
2733
 
@@ -2796,11 +2861,8 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const ReadOptions& read_options,
2796
2861
 
2797
2862
  BlockCacheLookupContext context(caller);
2798
2863
  IndexBlockIter iiter_on_stack;
2799
- ReadOptions ro;
2800
- ro.total_order_seek = true;
2801
- ro.io_activity = read_options.io_activity;
2802
2864
  auto index_iter =
2803
- NewIndexIterator(ro, /*disable_prefix_seek=*/true,
2865
+ NewIndexIterator(read_options, /*disable_prefix_seek=*/true,
2804
2866
  /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
2805
2867
  /*lookup_context=*/&context);
2806
2868
  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
@@ -2843,11 +2905,8 @@ uint64_t BlockBasedTable::ApproximateSize(const ReadOptions& read_options,
2843
2905
 
2844
2906
  BlockCacheLookupContext context(caller);
2845
2907
  IndexBlockIter iiter_on_stack;
2846
- ReadOptions ro;
2847
- ro.total_order_seek = true;
2848
- ro.io_activity = read_options.io_activity;
2849
2908
  auto index_iter =
2850
- NewIndexIterator(ro, /*disable_prefix_seek=*/true,
2909
+ NewIndexIterator(read_options, /*disable_prefix_seek=*/true,
2851
2910
  /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
2852
2911
  /*lookup_context=*/&context);
2853
2912
  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
@@ -3023,10 +3082,8 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
3023
3082
  if (rep_->uncompression_dict_reader) {
3024
3083
  CachableEntry<UncompressionDict> uncompression_dict;
3025
3084
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
3026
- nullptr /* prefetch_buffer */, ro, false /* no_io */,
3027
- false, /* verify_checksums */
3028
- nullptr /* get_context */, nullptr /* lookup_context */,
3029
- &uncompression_dict);
3085
+ nullptr /* prefetch_buffer */, ro, nullptr /* get_context */,
3086
+ nullptr /* lookup_context */, &uncompression_dict);
3030
3087
  if (!s.ok()) {
3031
3088
  return s;
3032
3089
  }
@@ -3232,4 +3289,8 @@ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
3232
3289
  out_stream << " ------\n";
3233
3290
  }
3234
3291
 
3292
+ void BlockBasedTable::MarkObsolete(uint32_t uncache_aggressiveness) {
3293
+ rep_->uncache_aggressiveness.StoreRelaxed(uncache_aggressiveness);
3294
+ }
3295
+
3235
3296
  } // namespace ROCKSDB_NAMESPACE
@@ -33,6 +33,7 @@
33
33
  #include "table/table_reader.h"
34
34
  #include "table/two_level_iterator.h"
35
35
  #include "trace_replay/block_cache_tracer.h"
36
+ #include "util/atomic.h"
36
37
  #include "util/coro_utils.h"
37
38
  #include "util/hash_containers.h"
38
39
 
@@ -183,6 +184,8 @@ class BlockBasedTable : public TableReader {
183
184
  Status ApproximateKeyAnchors(const ReadOptions& read_options,
184
185
  std::vector<Anchor>& anchors) override;
185
186
 
187
+ bool EraseFromCache(const BlockHandle& handle) const;
188
+
186
189
  bool TEST_BlockInCache(const BlockHandle& handle) const;
187
190
 
188
191
  // Returns true if the block for the specified key is in cache.
@@ -208,6 +211,8 @@ class BlockBasedTable : public TableReader {
208
211
  Status VerifyChecksum(const ReadOptions& readOptions,
209
212
  TableReaderCaller caller) override;
210
213
 
214
+ void MarkObsolete(uint32_t uncache_aggressiveness) override;
215
+
211
216
  ~BlockBasedTable();
212
217
 
213
218
  bool TEST_FilterBlockInCache() const;
@@ -241,6 +246,8 @@ class BlockBasedTable : public TableReader {
241
246
  FilePrefetchBuffer* /* tail_prefetch_buffer */) {
242
247
  return Status::OK();
243
248
  }
249
+ virtual void EraseFromCacheBeforeDestruction(
250
+ uint32_t /*uncache_aggressiveness*/) {}
244
251
  };
245
252
 
246
253
  class IndexReaderCommon;
@@ -462,14 +469,12 @@ class BlockBasedTable : public TableReader {
462
469
  std::unique_ptr<IndexReader>* index_reader);
463
470
 
464
471
  bool FullFilterKeyMayMatch(FilterBlockReader* filter, const Slice& user_key,
465
- const bool no_io,
466
472
  const SliceTransform* prefix_extractor,
467
473
  GetContext* get_context,
468
474
  BlockCacheLookupContext* lookup_context,
469
475
  const ReadOptions& read_options) const;
470
476
 
471
477
  void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
472
- const bool no_io,
473
478
  const SliceTransform* prefix_extractor,
474
479
  BlockCacheLookupContext* lookup_context,
475
480
  const ReadOptions& read_options) const;
@@ -619,11 +624,7 @@ struct BlockBasedTable::Rep {
619
624
 
620
625
  std::shared_ptr<FragmentedRangeTombstoneList> fragmented_range_dels;
621
626
 
622
- // FIXME
623
- // If true, data blocks in this file are definitely ZSTD compressed. If false
624
- // they might not be. When false we skip creating a ZSTD digested
625
- // uncompression dictionary. Even if we get a false negative, things should
626
- // still work, just not as quickly.
627
+ // Context for block cache CreateCallback
627
628
  BlockCreateContext create_context;
628
629
 
629
630
  // If global_seqno is used, all Keys in this file will have the same
@@ -672,6 +673,13 @@ struct BlockBasedTable::Rep {
672
673
  // `end_key` for range deletion entries.
673
674
  const bool user_defined_timestamps_persisted;
674
675
 
676
+ // Set to >0 when the file is known to be obsolete and should have its block
677
+ // cache entries evicted on close. NOTE: when the file becomes obsolete,
678
+ // there could be multiple table cache references that all mark this file as
679
+ // obsolete. An atomic resolves the race quite reasonably. Even in the rare
680
+ // case of such a race, they will most likely be storing the same value.
681
+ RelaxedAtomic<uint32_t> uncache_aggressiveness{0};
682
+
675
683
  std::unique_ptr<CacheReservationManager::CacheReservationHandle>
676
684
  table_reader_cache_res_handle = nullptr;
677
685
 
@@ -62,7 +62,6 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
62
62
  CachableEntry<Block> block;
63
63
  if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
64
64
  CachableEntry<UncompressionDict> uncompression_dict;
65
- const bool no_io = (ro.read_tier == kBlockCacheTier);
66
65
  // For async scans, don't use the prefetch buffer since an async prefetch
67
66
  // might already be under way and this would invalidate it. Also, the
68
67
  // uncompression dict is typically at the end of the file and would
@@ -72,8 +71,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
72
71
  // pattern.
73
72
  s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
74
73
  ((ro.async_io || ro.auto_readahead_size) ? nullptr : prefetch_buffer),
75
- ro, no_io, ro.verify_checksums, get_context, lookup_context,
76
- &uncompression_dict);
74
+ ro, get_context, lookup_context, &uncompression_dict);
77
75
  if (!s.ok()) {
78
76
  iter->Invalidate(s);
79
77
  return iter;
@@ -362,7 +362,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
362
362
 
363
363
  // First check the full filter
364
364
  // If full filter not useful, Then go into each block
365
- const bool no_io = read_options.read_tier == kBlockCacheTier;
366
365
  uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
367
366
  if (sst_file_range.begin()->get_context) {
368
367
  tracing_mget_id = sst_file_range.begin()->get_context->get_tracing_get_id();
@@ -372,7 +371,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
372
371
  BlockCacheLookupContext metadata_lookup_context{
373
372
  TableReaderCaller::kUserMultiGet, tracing_mget_id,
374
373
  /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
375
- FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
374
+ FullFilterKeysMayMatch(filter, &sst_file_range, prefix_extractor,
376
375
  &metadata_lookup_context, read_options);
377
376
 
378
377
  if (!sst_file_range.empty()) {
@@ -461,9 +460,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
461
460
  uncompression_dict_status =
462
461
  rep_->uncompression_dict_reader
463
462
  ->GetOrReadUncompressionDictionary(
464
- nullptr /* prefetch_buffer */, read_options, no_io,
465
- read_options.verify_checksums, get_context,
466
- &metadata_lookup_context, &uncompression_dict);
463
+ nullptr /* prefetch_buffer */, read_options,
464
+ get_context, &metadata_lookup_context,
465
+ &uncompression_dict);
467
466
  uncompression_dict_inited = true;
468
467
  }
469
468
 
@@ -668,7 +667,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
668
667
  biter->status().IsIncomplete()) {
669
668
  // couldn't get block from block_cache
670
669
  // Update Saver.state to Found because we are only looking for
671
- // whether we can guarantee the key is not there when "no_io" is set
670
+ // whether we can guarantee the key is not there with kBlockCacheTier
672
671
  get_context->MarkKeyMayExist();
673
672
  break;
674
673
  }