@nxtedition/rocksdb 15.4.1 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (399) hide show
  1. package/binding.cc +24 -15
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/iterator.js +2 -2
  395. package/package.json +1 -1
  396. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  397. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -16,6 +16,7 @@
16
16
  #include "file/file_util.h"
17
17
  #include "file/filename.h"
18
18
  #include "file/random_access_file_reader.h"
19
+ #include "monitoring/file_read_sample.h"
19
20
  #include "monitoring/perf_context_imp.h"
20
21
  #include "rocksdb/advanced_options.h"
21
22
  #include "rocksdb/statistics.h"
@@ -72,6 +73,8 @@ TableCache::TableCache(const ImmutableOptions& ioptions,
72
73
  file_options_(*file_options),
73
74
  cache_(cache),
74
75
  immortal_tables_(false),
76
+ should_pin_table_handles_(cache_.get()->GetCapacity() >=
77
+ kInfiniteCapacity),
75
78
  block_cache_tracer_(block_cache_tracer),
76
79
  loader_mutex_(kLoadConcurency),
77
80
  io_tracer_(io_tracer),
@@ -98,6 +101,8 @@ Status TableCache::GetTableReader(
98
101
  std::unique_ptr<FSRandomAccessFile> file;
99
102
  FileOptions fopts = file_options;
100
103
  fopts.temperature = file_temperature;
104
+ fopts.file_checksum = file_meta.file_checksum;
105
+ fopts.file_checksum_func_name = file_meta.file_checksum_func_name;
101
106
  Status s = PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
102
107
  TEST_SYNC_POINT_CALLBACK("TableCache::GetTableReader:BeforeOpenFile",
103
108
  const_cast<Status*>(&s));
@@ -113,8 +118,7 @@ Status TableCache::GetTableReader(
113
118
  Status temp_s =
114
119
  PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
115
120
  if (temp_s.ok()) {
116
- temp_s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
117
- nullptr);
121
+ temp_s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
118
122
  }
119
123
  if (temp_s.ok()) {
120
124
  RecordTick(ioptions_.stats, NO_FILE_OPENS);
@@ -168,15 +172,29 @@ Cache::Handle* TableCache::Lookup(Cache* cache, uint64_t file_number) {
168
172
  return cache->Lookup(key);
169
173
  }
170
174
 
175
+ // TODO: consider making handle RAII.
171
176
  Status TableCache::FindTable(
172
177
  const ReadOptions& ro, const FileOptions& file_options,
173
178
  const InternalKeyComparator& internal_comparator,
174
179
  const FileMetaData& file_meta, TypedHandle** handle,
175
- const MutableCFOptions& mutable_cf_options, const bool no_io,
176
- HistogramImpl* file_read_hist, bool skip_filters, int level,
177
- bool prefetch_index_and_filter_in_cache,
178
- size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
180
+ const MutableCFOptions& mutable_cf_options, TableReader** out_table_reader,
181
+ const bool no_io, HistogramImpl* file_read_hist, bool skip_filters,
182
+ int level, bool prefetch_index_and_filter_in_cache,
183
+ size_t max_file_size_for_l0_meta_pin, Temperature file_temperature,
184
+ bool pin_table_handle) {
185
+ assert(out_table_reader != nullptr && *out_table_reader == nullptr);
186
+ assert(handle != nullptr && *handle == nullptr);
179
187
  PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
188
+
189
+ // Fast path: if table reader is already pinned, return it directly without a
190
+ // cache lookup.
191
+ auto pinned_reader = file_meta.fd.pinned_reader.Get();
192
+ if (pinned_reader != nullptr) {
193
+ *handle = nullptr;
194
+ *out_table_reader = pinned_reader;
195
+ return Status::OK();
196
+ }
197
+
180
198
  uint64_t number = file_meta.fd.GetNumber();
181
199
  // NOTE: sharing same Cache with BlobFileCache
182
200
  Slice key = GetSliceForFileNumber(&number);
@@ -184,39 +202,71 @@ Status TableCache::FindTable(
184
202
  TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",
185
203
  const_cast<bool*>(&no_io));
186
204
 
205
+ Status s = Status::OK();
187
206
  if (*handle == nullptr) {
188
207
  if (no_io) {
189
- return Status::Incomplete("Table not found in table_cache, no_io is set");
208
+ s = Status::Incomplete("Table not found in table_cache, no_io is set");
209
+ return s;
190
210
  }
191
211
  MutexLock load_lock(&loader_mutex_.Get(key));
212
+
213
+ // Check if another thread has already pinned the table reader
214
+ pinned_reader = file_meta.fd.pinned_reader.Get();
215
+ if (pinned_reader != nullptr) {
216
+ *handle = nullptr;
217
+ *out_table_reader = pinned_reader;
218
+ return s;
219
+ }
220
+
192
221
  // We check the cache again under loading mutex
193
222
  *handle = cache_.Lookup(key);
194
- if (*handle != nullptr) {
195
- return Status::OK();
223
+ if (*handle == nullptr) {
224
+ std::unique_ptr<TableReader> table_reader;
225
+ s = GetTableReader(ro, file_options, internal_comparator, file_meta,
226
+ false /* sequential mode */, file_read_hist,
227
+ &table_reader, mutable_cf_options, skip_filters, level,
228
+ prefetch_index_and_filter_in_cache,
229
+ max_file_size_for_l0_meta_pin, file_temperature);
230
+ if (!s.ok()) {
231
+ assert(table_reader == nullptr);
232
+ RecordTick(ioptions_.stats, NO_FILE_ERRORS);
233
+ // We do not cache error results so that if the error is transient,
234
+ // or somebody repairs the file, we recover automatically.
235
+ IGNORE_STATUS_IF_ERROR(s);
236
+ } else {
237
+ s = cache_.Insert(key, table_reader.get(), 1, handle);
238
+ if (s.ok()) {
239
+ // Release ownership of table reader.
240
+ (void)table_reader.release();
241
+ }
242
+ }
196
243
  }
197
244
 
198
- std::unique_ptr<TableReader> table_reader;
199
- Status s = GetTableReader(ro, file_options, internal_comparator, file_meta,
200
- false /* sequential mode */, file_read_hist,
201
- &table_reader, mutable_cf_options, skip_filters,
202
- level, prefetch_index_and_filter_in_cache,
203
- max_file_size_for_l0_meta_pin, file_temperature);
204
- if (!s.ok()) {
205
- assert(table_reader == nullptr);
206
- RecordTick(ioptions_.stats, NO_FILE_ERRORS);
207
- // We do not cache error results so that if the error is transient,
208
- // or somebody repairs the file, we recover automatically.
209
- IGNORE_STATUS_IF_ERROR(s);
210
- } else {
211
- s = cache_.Insert(key, table_reader.get(), 1, handle);
212
- if (s.ok()) {
213
- // Release ownership of table reader.
214
- table_reader.release();
245
+ if (s.ok()) {
246
+ *out_table_reader = cache_.Value(*handle);
247
+ if (pin_table_handle) {
248
+ file_meta.fd.pinned_reader.Pin(*handle, *out_table_reader);
249
+ *handle = nullptr;
215
250
  }
216
251
  }
217
- return s;
252
+ } else {
253
+ *out_table_reader = cache_.Value(*handle);
254
+ if (pin_table_handle) {
255
+ // handle is in cache but not pinned. This should happen fairly rarely,
256
+ // and once the reader is pinned, we will no longer need to go through
257
+ // these mutexes again.
258
+ MutexLock load_lock(&loader_mutex_.Get(key));
259
+ if (file_meta.fd.pinned_reader.Get() != nullptr) {
260
+ // Another thread has pinned the handle; release our lookup ref.
261
+ cache_.Release(*handle);
262
+ } else {
263
+ file_meta.fd.pinned_reader.Pin(*handle, *out_table_reader);
264
+ }
265
+ *handle = nullptr;
266
+ }
218
267
  }
219
- return Status::OK();
268
+
269
+ return s;
220
270
  }
221
271
 
222
272
  InternalIterator* TableCache::NewIterator(
@@ -229,7 +279,8 @@ InternalIterator* TableCache::NewIterator(
229
279
  const InternalKey* smallest_compaction_key,
230
280
  const InternalKey* largest_compaction_key, bool allow_unprepared_value,
231
281
  const SequenceNumber* read_seqno,
232
- std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter) {
282
+ std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter,
283
+ bool maybe_pin_table_handle) {
233
284
  PERF_TIMER_GUARD(new_table_iterator_nanos);
234
285
 
235
286
  Status s;
@@ -239,19 +290,15 @@ InternalIterator* TableCache::NewIterator(
239
290
  *table_reader_ptr = nullptr;
240
291
  }
241
292
  bool for_compaction = caller == TableReaderCaller::kCompaction;
242
- auto& fd = file_meta.fd;
243
- table_reader = fd.table_reader;
244
- if (table_reader == nullptr) {
245
- s = FindTable(options, file_options, icomparator, file_meta, &handle,
246
- mutable_cf_options,
247
- options.read_tier == kBlockCacheTier /* no_io */,
248
- file_read_hist, skip_filters, level,
249
- true /* prefetch_index_and_filter_in_cache */,
250
- max_file_size_for_l0_meta_pin, file_meta.temperature);
251
- if (s.ok()) {
252
- table_reader = cache_.Value(handle);
253
- }
254
- }
293
+ TEST_SYNC_POINT_CALLBACK("TableCache::NewIterator::BeforeFindTable",
294
+ const_cast<FileDescriptor*>(&file_meta.fd));
295
+ s = FindTable(options, file_options, icomparator, file_meta, &handle,
296
+ mutable_cf_options, &table_reader,
297
+ options.read_tier == kBlockCacheTier /* no_io */,
298
+ file_read_hist, skip_filters, level,
299
+ true /* prefetch_index_and_filter_in_cache */,
300
+ max_file_size_for_l0_meta_pin, file_meta.temperature,
301
+ maybe_pin_table_handle && should_pin_table_handles_);
255
302
  InternalIterator* result = nullptr;
256
303
  if (s.ok()) {
257
304
  if (options.table_filter &&
@@ -292,7 +339,7 @@ InternalIterator* TableCache::NewIterator(
292
339
  }
293
340
  }
294
341
  if (range_del_agg != nullptr) {
295
- if (range_del_agg->AddFile(fd.GetNumber())) {
342
+ if (range_del_agg->AddFile(file_meta.fd.GetNumber())) {
296
343
  std::unique_ptr<FragmentedRangeTombstoneIterator> new_range_del_iter(
297
344
  static_cast<FragmentedRangeTombstoneIterator*>(
298
345
  table_reader->NewRangeTombstoneIterator(options)));
@@ -331,17 +378,11 @@ Status TableCache::GetRangeTombstoneIterator(
331
378
  const FileMetaData& file_meta, const MutableCFOptions& mutable_cf_options,
332
379
  std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter) {
333
380
  assert(out_iter);
334
- const FileDescriptor& fd = file_meta.fd;
335
381
  Status s;
336
- TableReader* t = fd.table_reader;
382
+ TableReader* t = nullptr;
337
383
  TypedHandle* handle = nullptr;
338
- if (t == nullptr) {
339
- s = FindTable(options, file_options_, internal_comparator, file_meta,
340
- &handle, mutable_cf_options);
341
- if (s.ok()) {
342
- t = cache_.Value(handle);
343
- }
344
- }
384
+ s = FindTable(options, file_options_, internal_comparator, file_meta, &handle,
385
+ mutable_cf_options, &t);
345
386
  if (s.ok()) {
346
387
  // Note: NewRangeTombstoneIterator could return nullptr
347
388
  out_iter->reset(t->NewRangeTombstoneIterator(options));
@@ -455,20 +496,18 @@ Status TableCache::Get(const ReadOptions& options,
455
496
  row_cache_entry = &row_cache_entry_buffer;
456
497
  }
457
498
  }
458
- TableReader* t = fd.table_reader;
499
+ TEST_SYNC_POINT_CALLBACK("TableCache::Get::BeforeFindTable",
500
+ const_cast<FileDescriptor*>(&fd));
501
+ TableReader* t = nullptr;
459
502
  TypedHandle* handle = nullptr;
460
503
  if (s.ok() && !done) {
461
- if (t == nullptr) {
462
- s = FindTable(options, file_options_, internal_comparator, file_meta,
463
- &handle, mutable_cf_options,
464
- options.read_tier == kBlockCacheTier /* no_io */,
465
- file_read_hist, skip_filters, level,
466
- true /* prefetch_index_and_filter_in_cache */,
467
- max_file_size_for_l0_meta_pin, file_meta.temperature);
468
- if (s.ok()) {
469
- t = cache_.Value(handle);
470
- }
471
- }
504
+ s = FindTable(options, file_options_, internal_comparator, file_meta,
505
+ &handle, mutable_cf_options, &t,
506
+ options.read_tier == kBlockCacheTier /* no_io */,
507
+ file_read_hist, skip_filters, level,
508
+ true /* prefetch_index_and_filter_in_cache */,
509
+ max_file_size_for_l0_meta_pin, file_meta.temperature,
510
+ should_pin_table_handles_);
472
511
  SequenceNumber* max_covering_tombstone_seq =
473
512
  get_context->max_covering_tombstone_seq();
474
513
  if (s.ok() && max_covering_tombstone_seq != nullptr &&
@@ -545,8 +584,8 @@ Status TableCache::MultiGetFilter(
545
584
  const InternalKeyComparator& internal_comparator,
546
585
  const FileMetaData& file_meta, const MutableCFOptions& mutable_cf_options,
547
586
  HistogramImpl* file_read_hist, int level,
548
- MultiGetContext::Range* mget_range, TypedHandle** table_handle) {
549
- auto& fd = file_meta.fd;
587
+ MultiGetContext::Range* mget_range, TypedHandle** handle) {
588
+ assert(*handle == nullptr);
550
589
  IterKey row_cache_key;
551
590
  std::string row_cache_entry_buffer;
552
591
 
@@ -558,23 +597,17 @@ Status TableCache::MultiGetFilter(
558
597
  return Status::NotSupported();
559
598
  }
560
599
  Status s;
561
- TableReader* t = fd.table_reader;
562
- TypedHandle* handle = nullptr;
600
+ TableReader* t = nullptr;
563
601
  MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
564
602
  mget_range->end());
565
- if (t == nullptr) {
566
- s = FindTable(options, file_options_, internal_comparator, file_meta,
567
- &handle, mutable_cf_options,
568
- options.read_tier == kBlockCacheTier /* no_io */,
569
- file_read_hist,
570
- /*skip_filters=*/false, level,
571
- true /* prefetch_index_and_filter_in_cache */,
572
- /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
573
- if (s.ok()) {
574
- t = cache_.Value(handle);
575
- }
576
- *table_handle = handle;
577
- }
603
+ s = FindTable(options, file_options_, internal_comparator, file_meta, handle,
604
+ mutable_cf_options, &t,
605
+ options.read_tier == kBlockCacheTier /* no_io */,
606
+ file_read_hist,
607
+ /*skip_filters=*/false, level,
608
+ true /* prefetch_index_and_filter_in_cache */,
609
+ /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature,
610
+ should_pin_table_handles_);
578
611
  if (s.ok()) {
579
612
  s = t->MultiGetFilter(options, mutable_cf_options.prefix_extractor.get(),
580
613
  mget_range);
@@ -585,9 +618,9 @@ Status TableCache::MultiGetFilter(
585
618
  // is, it may be called with fewer keys in the rangedue to filtering.
586
619
  UpdateRangeTombstoneSeqnums(options, t, tombstone_range);
587
620
  }
588
- if (mget_range->empty() && handle) {
589
- cache_.Release(handle);
590
- *table_handle = nullptr;
621
+ if (mget_range->empty() && *handle) {
622
+ cache_.Release(*handle);
623
+ *handle = nullptr;
591
624
  }
592
625
 
593
626
  return s;
@@ -599,24 +632,18 @@ Status TableCache::GetTableProperties(
599
632
  const FileMetaData& file_meta,
600
633
  std::shared_ptr<const TableProperties>* properties,
601
634
  const MutableCFOptions& mutable_cf_options, bool no_io) {
602
- auto table_reader = file_meta.fd.table_reader;
603
- // table already been pre-loaded?
604
- if (table_reader) {
605
- *properties = table_reader->GetTableProperties();
606
-
607
- return Status::OK();
608
- }
609
-
610
635
  TypedHandle* table_handle = nullptr;
611
- Status s = FindTable(read_options, file_options, internal_comparator,
612
- file_meta, &table_handle, mutable_cf_options, no_io);
636
+ TableReader* table = nullptr;
637
+ Status s =
638
+ FindTable(read_options, file_options, internal_comparator, file_meta,
639
+ &table_handle, mutable_cf_options, &table, no_io);
613
640
  if (!s.ok()) {
614
641
  return s;
615
642
  }
616
- assert(table_handle);
617
- auto table = cache_.Value(table_handle);
618
643
  *properties = table->GetTableProperties();
619
- cache_.Release(table_handle);
644
+ if (table_handle) {
645
+ cache_.Release(table_handle);
646
+ }
620
647
  return s;
621
648
  }
622
649
 
@@ -626,15 +653,10 @@ Status TableCache::ApproximateKeyAnchors(
626
653
 
627
654
  std::vector<TableReader::Anchor>& anchors) {
628
655
  Status s;
629
- TableReader* t = file_meta.fd.table_reader;
656
+ TableReader* t = nullptr;
630
657
  TypedHandle* handle = nullptr;
631
- if (t == nullptr) {
632
- s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle,
633
- mutable_cf_options);
634
- if (s.ok()) {
635
- t = cache_.Value(handle);
636
- }
637
- }
658
+ s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle,
659
+ mutable_cf_options, &t);
638
660
  if (s.ok() && t != nullptr) {
639
661
  s = t->ApproximateKeyAnchors(ro, anchors);
640
662
  }
@@ -648,23 +670,18 @@ size_t TableCache::GetMemoryUsageByTableReader(
648
670
  const FileOptions& file_options, const ReadOptions& read_options,
649
671
  const InternalKeyComparator& internal_comparator,
650
672
  const FileMetaData& file_meta, const MutableCFOptions& mutable_cf_options) {
651
- auto table_reader = file_meta.fd.table_reader;
652
- // table already been pre-loaded?
653
- if (table_reader) {
654
- return table_reader->ApproximateMemoryUsage();
655
- }
656
-
657
673
  TypedHandle* table_handle = nullptr;
674
+ TableReader* table = nullptr;
658
675
  Status s =
659
676
  FindTable(read_options, file_options, internal_comparator, file_meta,
660
- &table_handle, mutable_cf_options, true /* no_io */);
677
+ &table_handle, mutable_cf_options, &table, true /* no_io */);
661
678
  if (!s.ok()) {
662
679
  return 0;
663
680
  }
664
- assert(table_handle);
665
- auto table = cache_.Value(table_handle);
666
681
  auto ret = table->ApproximateMemoryUsage();
667
- cache_.Release(table_handle);
682
+ if (table_handle) {
683
+ cache_.Release(table_handle);
684
+ }
668
685
  return ret;
669
686
  }
670
687
 
@@ -678,18 +695,13 @@ uint64_t TableCache::ApproximateOffsetOf(
678
695
  const InternalKeyComparator& internal_comparator,
679
696
  const MutableCFOptions& mutable_cf_options) {
680
697
  uint64_t result = 0;
681
- TableReader* table_reader = file_meta.fd.table_reader;
698
+ TableReader* table_reader = nullptr;
682
699
  TypedHandle* table_handle = nullptr;
683
- if (table_reader == nullptr) {
684
- Status s =
685
- FindTable(read_options, file_options_, internal_comparator, file_meta,
686
- &table_handle, mutable_cf_options, false /* no_io */);
687
- if (s.ok()) {
688
- table_reader = cache_.Value(table_handle);
689
- }
690
- }
700
+ Status s =
701
+ FindTable(read_options, file_options_, internal_comparator, file_meta,
702
+ &table_handle, mutable_cf_options, &table_reader);
691
703
 
692
- if (table_reader != nullptr) {
704
+ if (s.ok() && table_reader != nullptr) {
693
705
  result = table_reader->ApproximateOffsetOf(read_options, key, caller);
694
706
  }
695
707
  if (table_handle != nullptr) {
@@ -705,18 +717,13 @@ uint64_t TableCache::ApproximateSize(
705
717
  const InternalKeyComparator& internal_comparator,
706
718
  const MutableCFOptions& mutable_cf_options) {
707
719
  uint64_t result = 0;
708
- TableReader* table_reader = file_meta.fd.table_reader;
720
+ TableReader* table_reader = nullptr;
709
721
  TypedHandle* table_handle = nullptr;
710
- if (table_reader == nullptr) {
711
- Status s =
712
- FindTable(read_options, file_options_, internal_comparator, file_meta,
713
- &table_handle, mutable_cf_options, false /* no_io */);
714
- if (s.ok()) {
715
- table_reader = cache_.Value(table_handle);
716
- }
717
- }
722
+ Status s =
723
+ FindTable(read_options, file_options_, internal_comparator, file_meta,
724
+ &table_handle, mutable_cf_options, &table_reader);
718
725
 
719
- if (table_reader != nullptr) {
726
+ if (s.ok() && table_reader != nullptr) {
720
727
  result = table_reader->ApproximateSize(read_options, start, end, caller);
721
728
  }
722
729
  if (table_handle != nullptr) {
@@ -99,7 +99,8 @@ class TableCache {
99
99
  const InternalKey* smallest_compaction_key,
100
100
  const InternalKey* largest_compaction_key, bool allow_unprepared_value,
101
101
  const SequenceNumber* range_del_read_seqno = nullptr,
102
- std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter = nullptr);
102
+ std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter = nullptr,
103
+ bool maybe_pin_table_handle = false);
103
104
 
104
105
  // If a seek to internal key "k" in specified file finds an entry,
105
106
  // call get_context->SaveValue() repeatedly until
@@ -172,19 +173,31 @@ class TableCache {
172
173
  // Return handle to an existing cache entry if there is one
173
174
  static Cache::Handle* Lookup(Cache* cache, uint64_t file_number);
174
175
 
175
- // Find table reader
176
- // @param skip_filters Disables loading/accessing the filter block
177
- // @param level == -1 means not specified
176
+ // Look up the TableReader for the given file in the cache, or open the file
177
+ // and create a new TableReader if not cached. On success, sets *table_reader
178
+ // to point to the TableReader (owned by the cache) and *handle to the cache
179
+ // handle (caller must release via cache_.Release() unless pin_table_handle is
180
+ // true). If the table reader is already pinned on file_meta, returns it
181
+ // directly without a cache lookup.
182
+ //
183
+ // @param no_io If true, returns Status::Incomplete() when the table is not
184
+ // already in cache rather than reading from disk.
185
+ // @param skip_filters Disables loading/accessing the filter block.
186
+ // @param level The LSM level of this table, -1 if not specified.
187
+ // @param pin_table_handle If true, pins the table reader on file_meta so
188
+ // future lookups bypass the cache. *handle is set to nullptr
189
+ // on return in this case.
178
190
  Status FindTable(const ReadOptions& ro, const FileOptions& toptions,
179
191
  const InternalKeyComparator& internal_comparator,
180
192
  const FileMetaData& file_meta, TypedHandle**,
181
193
  const MutableCFOptions& mutable_cf_options,
182
- const bool no_io = false,
194
+ TableReader** table_reader, const bool no_io = false,
183
195
  HistogramImpl* file_read_hist = nullptr,
184
196
  bool skip_filters = false, int level = -1,
185
197
  bool prefetch_index_and_filter_in_cache = true,
186
198
  size_t max_file_size_for_l0_meta_pin = 0,
187
- Temperature file_temperature = Temperature::kUnknown);
199
+ Temperature file_temperature = Temperature::kUnknown,
200
+ bool pin_table_handle = false);
188
201
 
189
202
  // Get the table properties of a given table.
190
203
  // @no_io: indicates if we should load table to the cache if it is not present
@@ -231,6 +244,8 @@ class TableCache {
231
244
 
232
245
  CacheInterface& get_cache() { return cache_; }
233
246
 
247
+ const FileOptions& file_options() const { return file_options_; }
248
+
234
249
  // Capacity of the backing Cache that indicates infinite TableCache capacity.
235
250
  // For example when max_open_files is -1 we set the backing Cache to this.
236
251
  static const int kInfiniteCapacity = 0x400000;
@@ -243,6 +258,14 @@ class TableCache {
243
258
  }
244
259
  }
245
260
 
261
+ // Re-evaluates should_pin_table_handles_ from the current cache capacity.
262
+ // Must be called after the underlying cache capacity changes (e.g. via
263
+ // SetDBOptions changing max_open_files).
264
+ void UpdateShouldPinTableHandles() {
265
+ should_pin_table_handles_ =
266
+ cache_.get()->GetCapacity() >= kInfiniteCapacity;
267
+ }
268
+
246
269
  private:
247
270
  // Build a table reader
248
271
  Status GetTableReader(const ReadOptions& ro, const FileOptions& file_options,
@@ -283,6 +306,7 @@ class TableCache {
283
306
  CacheInterface cache_;
284
307
  std::string row_cache_id_;
285
308
  bool immortal_tables_;
309
+ bool should_pin_table_handles_;
286
310
  BlockCacheTracer* const block_cache_tracer_;
287
311
  Striped<CacheAlignedWrapper<port::Mutex>> loader_mutex_;
288
312
  std::shared_ptr<IOTracer> io_tracer_;
@@ -21,7 +21,9 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
21
21
  bool skip_filters, bool skip_range_deletions, int level, TypedHandle* handle) {
22
22
  auto& fd = file_meta.fd;
23
23
  Status s;
24
- TableReader* t = fd.table_reader;
24
+ TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet::BeforeFindTable",
25
+ const_cast<FileDescriptor*>(&fd));
26
+ TableReader* t = fd.pinned_reader.Get();
25
27
  MultiGetRange table_range(*mget_range, mget_range->begin(),
26
28
  mget_range->end());
27
29
  if (handle != nullptr && t == nullptr) {
@@ -70,16 +72,14 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
70
72
  if (t == nullptr) {
71
73
  assert(handle == nullptr);
72
74
  s = FindTable(options, file_options_, internal_comparator, file_meta,
73
- &handle, mutable_cf_options,
75
+ &handle, mutable_cf_options, &t,
74
76
  options.read_tier == kBlockCacheTier /* no_io */,
75
77
  file_read_hist, skip_filters, level,
76
78
  true /* prefetch_index_and_filter_in_cache */,
77
- 0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
79
+ 0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature,
80
+ should_pin_table_handles_);
78
81
  TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
79
- if (s.ok()) {
80
- t = cache_.Value(handle);
81
- assert(t);
82
- }
82
+ assert(!s.ok() || t);
83
83
  }
84
84
  if (s.ok() && !options.ignore_range_deletions && !skip_range_deletions) {
85
85
  UpdateRangeTombstoneSeqnums(options, t, table_range);
@@ -32,8 +32,10 @@
32
32
  #include "db/version_edit.h"
33
33
  #include "db/version_edit_handler.h"
34
34
  #include "db/version_set.h"
35
+ #include "db/version_util.h"
35
36
  #include "port/port.h"
36
37
  #include "table/table_reader.h"
38
+ #include "test_util/sync_point.h"
37
39
  #include "util/string_util.h"
38
40
 
39
41
  namespace ROCKSDB_NAMESPACE {
@@ -430,12 +432,11 @@ class VersionBuilder::Rep {
430
432
  void UnrefFile(FileMetaData* f) {
431
433
  f->refs--;
432
434
  if (f->refs <= 0) {
433
- if (f->table_reader_handle) {
435
+ if (f->fd.pinned_reader.Get() != nullptr) {
434
436
  assert(table_cache_ != nullptr);
435
437
  // NOTE: have to release in raw cache interface to avoid using a
436
- // TypedHandle for FileMetaData::table_reader_handle
437
- table_cache_->get_cache().get()->Release(f->table_reader_handle);
438
- f->table_reader_handle = nullptr;
438
+ // TypedHandle for PinnedTableReader's internal handle
439
+ f->fd.pinned_reader.Release(table_cache_->get_cache().get());
439
440
  }
440
441
 
441
442
  if (file_metadata_cache_res_mgr_) {
@@ -1680,7 +1681,6 @@ class VersionBuilder::Rep {
1680
1681
 
1681
1682
  // <file metadata, level>
1682
1683
  std::vector<std::pair<FileMetaData*, int>> files_meta;
1683
- std::vector<Status> statuses;
1684
1684
  for (int level = 0; level < num_levels_; level++) {
1685
1685
  for (auto& file_meta_pair : levels_[level].added_files) {
1686
1686
  auto* file_meta = file_meta_pair.second;
@@ -1690,9 +1690,8 @@ class VersionBuilder::Rep {
1690
1690
  continue;
1691
1691
  }
1692
1692
  // If the file has been opened before, just skip it.
1693
- if (!file_meta->table_reader_handle) {
1693
+ if (file_meta->fd.pinned_reader.Get() == nullptr) {
1694
1694
  files_meta.emplace_back(file_meta, level);
1695
- statuses.emplace_back(Status::OK());
1696
1695
  }
1697
1696
  if (files_meta.size() >= max_load) {
1698
1697
  break;
@@ -1703,49 +1702,11 @@ class VersionBuilder::Rep {
1703
1702
  }
1704
1703
  }
1705
1704
 
1706
- std::atomic<size_t> next_file_meta_idx(0);
1707
- std::function<void()> load_handlers_func([&]() {
1708
- while (true) {
1709
- size_t file_idx = next_file_meta_idx.fetch_add(1);
1710
- if (file_idx >= files_meta.size()) {
1711
- break;
1712
- }
1713
-
1714
- auto* file_meta = files_meta[file_idx].first;
1715
- int level = files_meta[file_idx].second;
1716
- TableCache::TypedHandle* handle = nullptr;
1717
- statuses[file_idx] = table_cache_->FindTable(
1718
- read_options, file_options_,
1719
- *(base_vstorage_->InternalComparator()), *file_meta, &handle,
1720
- mutable_cf_options, false /*no_io */,
1721
- internal_stats->GetFileReadHist(level), false, level,
1722
- prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
1723
- file_meta->temperature);
1724
- if (handle != nullptr) {
1725
- file_meta->table_reader_handle = handle;
1726
- // Load table_reader
1727
- file_meta->fd.table_reader = table_cache_->get_cache().Value(handle);
1728
- }
1729
- }
1730
- });
1731
-
1732
- std::vector<port::Thread> threads;
1733
- for (int i = 1; i < max_threads; i++) {
1734
- threads.emplace_back(load_handlers_func);
1735
- }
1736
- load_handlers_func();
1737
- for (auto& t : threads) {
1738
- t.join();
1739
- }
1740
- Status ret;
1741
- for (const auto& s : statuses) {
1742
- if (!s.ok()) {
1743
- if (ret.ok()) {
1744
- ret = s;
1745
- }
1746
- }
1747
- }
1748
- return ret;
1705
+ return LoadTableHandlersHelper(
1706
+ files_meta, table_cache_, file_options_,
1707
+ *base_vstorage_->InternalComparator(), internal_stats, max_threads,
1708
+ prefetch_index_and_filter_in_cache, mutable_cf_options,
1709
+ max_file_size_for_l0_meta_pin, read_options);
1749
1710
  }
1750
1711
  };
1751
1712
 
@@ -53,7 +53,8 @@ class VersionBuilder {
53
53
  // Save the current Version to the provided `vstorage`.
54
54
  Status SaveTo(VersionStorageInfo* vstorage) const;
55
55
 
56
- // Load all the table handlers for the current Version in the builder.
56
+ // Load table handlers for newly added files in the builder. This does not
57
+ // load any files in the base storage.
57
58
  Status LoadTableHandlers(InternalStats* internal_stats, int max_threads,
58
59
  bool prefetch_index_and_filter_in_cache,
59
60
  bool is_initial_load,