@nxtedition/rocksdb 15.4.1 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (399) hide show
  1. package/binding.cc +24 -15
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/iterator.js +2 -2
  395. package/package.json +1 -1
  396. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  397. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -170,7 +170,7 @@ class FilePicker {
170
170
  if (!search_ended_) {
171
171
  // Prefetch Level 0 table data to avoid cache miss if possible.
172
172
  for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
173
- auto* r = (*level_files_brief_)[0].files[i].fd.table_reader;
173
+ auto* r = (*level_files_brief_)[0].files[i].fd.pinned_reader.Get();
174
174
  if (r) {
175
175
  r->Prepare(ikey);
176
176
  }
@@ -395,7 +395,7 @@ class FilePickerMultiGet {
395
395
  // prefetching. This may not be necessary anymore once we implement
396
396
  // batching in those table readers
397
397
  for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
398
- auto* r = (*level_files_brief_)[0].files[i].fd.table_reader;
398
+ auto* r = (*level_files_brief_)[0].files[i].fd.pinned_reader.Get();
399
399
  if (r) {
400
400
  for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
401
401
  r->Prepare(iter->ikey);
@@ -974,9 +974,8 @@ class LevelIterator final : public InternalIterator {
974
974
  TableCache* table_cache, const ReadOptions& read_options,
975
975
  const FileOptions& file_options, const InternalKeyComparator& icomparator,
976
976
  const LevelFilesBrief* flevel, const MutableCFOptions& mutable_cf_options,
977
- bool should_sample, HistogramImpl* file_read_hist,
978
- TableReaderCaller caller, bool skip_filters, int level,
979
- RangeDelAggregator* range_del_agg,
977
+ HistogramImpl* file_read_hist, TableReaderCaller caller,
978
+ bool skip_filters, int level, RangeDelAggregator* range_del_agg,
980
979
  const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
981
980
  nullptr,
982
981
  bool allow_unprepared_value = false,
@@ -1002,7 +1001,6 @@ class LevelIterator final : public InternalIterator {
1002
1001
  ? read_options.snapshot->GetSequenceNumber()
1003
1002
  : kMaxSequenceNumber),
1004
1003
  level_(level),
1005
- should_sample_(should_sample),
1006
1004
  skip_filters_(skip_filters),
1007
1005
  allow_unprepared_value_(allow_unprepared_value),
1008
1006
  is_next_read_sequential_(false),
@@ -1267,6 +1265,26 @@ class LevelIterator final : public InternalIterator {
1267
1265
  *read_options_.iterate_upper_bound, /*b_has_ts=*/false) >= 0;
1268
1266
  }
1269
1267
 
1268
+ template <bool IsSeek>
1269
+ void SampleRead() {
1270
+ bool sampled =
1271
+ IsSeek ? should_sample_file_read() : should_sample_file_read_next();
1272
+ if (!sampled) {
1273
+ return;
1274
+ }
1275
+
1276
+ if (file_index_ >= flevel_->num_files || !file_iter_.Valid()) {
1277
+ return;
1278
+ }
1279
+ const FileMetaData* meta = flevel_->files[file_index_].file_metadata;
1280
+ sample_file_read_inc(meta);
1281
+ ValueType type = ExtractValueType(file_iter_.key());
1282
+ if (type == kTypeDeletion || type == kTypeSingleDeletion ||
1283
+ type == kTypeDeletionWithTimestamp || type == kTypeMerge) {
1284
+ sample_collapsible_entry_file_read_inc(meta);
1285
+ }
1286
+ }
1287
+
1270
1288
  void ClearRangeTombstoneIter() {
1271
1289
  if (range_tombstone_iter_) {
1272
1290
  range_tombstone_iter_->reset();
@@ -1279,9 +1297,6 @@ class LevelIterator final : public InternalIterator {
1279
1297
  InternalIterator* NewFileIterator() {
1280
1298
  assert(file_index_ < flevel_->num_files);
1281
1299
  auto file_meta = flevel_->files[file_index_];
1282
- if (should_sample_) {
1283
- sample_file_read_inc(file_meta.file_metadata);
1284
- }
1285
1300
 
1286
1301
  const InternalKey* smallest_compaction_key = nullptr;
1287
1302
  const InternalKey* largest_compaction_key = nullptr;
@@ -1298,7 +1313,8 @@ class LevelIterator final : public InternalIterator {
1298
1313
  /*arena=*/nullptr, skip_filters_, level_,
1299
1314
  /*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
1300
1315
  largest_compaction_key, allow_unprepared_value_, &read_seq_,
1301
- range_tombstone_iter_);
1316
+ range_tombstone_iter_,
1317
+ /*maybe_pin_table_handle=*/true);
1302
1318
  }
1303
1319
 
1304
1320
  // Check if current file being fully within iterate_lower_bound.
@@ -1361,7 +1377,6 @@ class LevelIterator final : public InternalIterator {
1361
1377
  SequenceNumber read_seq_;
1362
1378
 
1363
1379
  int level_;
1364
- bool should_sample_;
1365
1380
  bool skip_filters_;
1366
1381
  bool allow_unprepared_value_;
1367
1382
  bool may_be_out_of_lower_bound_ = true;
@@ -1498,6 +1513,7 @@ void LevelIterator::Seek(const Slice& target) {
1498
1513
  }
1499
1514
  SkipEmptyFileForward();
1500
1515
  CheckMayBeOutOfLowerBound();
1516
+ SampleRead<true>();
1501
1517
  }
1502
1518
 
1503
1519
  void LevelIterator::SeekForPrev(const Slice& target) {
@@ -1533,6 +1549,7 @@ void LevelIterator::SeekForPrev(const Slice& target) {
1533
1549
  SkipEmptyFileBackward();
1534
1550
  }
1535
1551
  CheckMayBeOutOfLowerBound();
1552
+ SampleRead<true>();
1536
1553
  }
1537
1554
 
1538
1555
  void LevelIterator::SeekToFirst() {
@@ -1549,6 +1566,7 @@ void LevelIterator::SeekToFirst() {
1549
1566
  }
1550
1567
  SkipEmptyFileForward();
1551
1568
  CheckMayBeOutOfLowerBound();
1569
+ SampleRead<true>();
1552
1570
  }
1553
1571
 
1554
1572
  void LevelIterator::SeekToLast() {
@@ -1563,6 +1581,7 @@ void LevelIterator::SeekToLast() {
1563
1581
  }
1564
1582
  SkipEmptyFileBackward();
1565
1583
  CheckMayBeOutOfLowerBound();
1584
+ SampleRead<true>();
1566
1585
  }
1567
1586
 
1568
1587
  void LevelIterator::Next() {
@@ -1577,6 +1596,7 @@ void LevelIterator::Next() {
1577
1596
  }
1578
1597
  }
1579
1598
  SkipEmptyFileForward();
1599
+ SampleRead<false>();
1580
1600
  }
1581
1601
 
1582
1602
  bool LevelIterator::NextAndGetResult(IterateResult* result) {
@@ -1610,6 +1630,7 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) {
1610
1630
  }
1611
1631
  }
1612
1632
  }
1633
+ SampleRead<false>();
1613
1634
  return is_valid;
1614
1635
  }
1615
1636
 
@@ -1624,6 +1645,7 @@ void LevelIterator::Prev() {
1624
1645
  }
1625
1646
  }
1626
1647
  SkipEmptyFileBackward();
1648
+ SampleRead<false>();
1627
1649
  }
1628
1650
 
1629
1651
  bool LevelIterator::SkipEmptyFileForward() {
@@ -1820,8 +1842,10 @@ Status Version::GetTableProperties(const ReadOptions& read_options,
1820
1842
  file_name = TableFileName(ioptions.cf_paths, file_meta->fd.GetNumber(),
1821
1843
  file_meta->fd.GetPathId());
1822
1844
  }
1823
- s = ioptions.fs->NewRandomAccessFile(file_name, file_options_, &file,
1824
- nullptr);
1845
+ FileOptions fopts = file_options_;
1846
+ fopts.file_checksum = file_meta->file_checksum;
1847
+ fopts.file_checksum_func_name = file_meta->file_checksum_func_name;
1848
+ s = ioptions.fs->NewRandomAccessFile(file_name, fopts, &file, nullptr);
1825
1849
  if (!s.ok()) {
1826
1850
  return s;
1827
1851
  }
@@ -2219,7 +2243,7 @@ void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
2219
2243
  uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
2220
2244
  for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
2221
2245
  for (FileMetaData* meta : storage_info_.LevelFiles(level)) {
2222
- assert(meta->fd.table_reader != nullptr);
2246
+ assert(meta->fd.pinned_reader.Get() != nullptr);
2223
2247
  uint64_t file_creation_time = meta->TryGetFileCreationTime();
2224
2248
  if (file_creation_time == kUnknownFileCreationTime) {
2225
2249
  *creation_time = 0;
@@ -2242,8 +2266,7 @@ InternalIterator* Version::TEST_GetLevelIterator(
2242
2266
  auto level_iter = new (mem) LevelIterator(
2243
2267
  cfd_->table_cache(), read_options, file_options_,
2244
2268
  cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
2245
- mutable_cf_options_, should_sample_file_read(),
2246
- cfd_->internal_stats()->GetFileReadHist(level),
2269
+ mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
2247
2270
  TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
2248
2271
  nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
2249
2272
  allow_unprepared_value, &tombstone_iter_ptr, db_statistics_, clock_);
@@ -2339,8 +2362,6 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2339
2362
  return;
2340
2363
  }
2341
2364
 
2342
- bool should_sample = should_sample_file_read();
2343
-
2344
2365
  auto* arena = merge_iter_builder->GetArena();
2345
2366
  if (level == 0) {
2346
2367
  // Merge all level zero files together since they may overlap
@@ -2355,7 +2376,8 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2355
2376
  /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
2356
2377
  /*smallest_compaction_key=*/nullptr,
2357
2378
  /*largest_compaction_key=*/nullptr, allow_unprepared_value,
2358
- /*range_del_read_seqno=*/nullptr, &tombstone_iter);
2379
+ /*range_del_read_seqno=*/nullptr, &tombstone_iter,
2380
+ /*maybe_pin_table_handle=*/true);
2359
2381
  if (read_options.ignore_range_deletions) {
2360
2382
  merge_iter_builder->AddIterator(table_iter);
2361
2383
  } else {
@@ -2363,11 +2385,10 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2363
2385
  table_iter, std::move(tombstone_iter));
2364
2386
  }
2365
2387
  }
2366
- if (should_sample) {
2388
+ if (should_sample_file_read()) {
2367
2389
  // Count ones for every L0 files. This is done per iterator creation
2368
- // rather than Seek(), while files in other levels are recored per seek.
2369
- // If users execute one range query per iterator, there may be some
2370
- // discrepancy here.
2390
+ // rather than Seek(), while files in other levels are sampled on
2391
+ // seek/next/prev.
2371
2392
  for (FileMetaData* meta : storage_info_.LevelFiles(0)) {
2372
2393
  sample_file_read_inc(meta);
2373
2394
  }
@@ -2381,8 +2402,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2381
2402
  auto level_iter = new (mem) LevelIterator(
2382
2403
  cfd_->table_cache(), read_options, soptions,
2383
2404
  cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
2384
- mutable_cf_options_, should_sample_file_read(),
2385
- cfd_->internal_stats()->GetFileReadHist(level),
2405
+ mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
2386
2406
  TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
2387
2407
  /*range_del_agg=*/nullptr,
2388
2408
  /*compaction_boundaries=*/nullptr, allow_unprepared_value,
@@ -2440,8 +2460,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
2440
2460
  ScopedArenaPtr<InternalIterator> iter(new (mem) LevelIterator(
2441
2461
  cfd_->table_cache(), read_options, file_options,
2442
2462
  cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
2443
- mutable_cf_options_, should_sample_file_read(),
2444
- cfd_->internal_stats()->GetFileReadHist(level),
2463
+ mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
2445
2464
  TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
2446
2465
  &range_del_agg, nullptr, false, nullptr, db_statistics_, clock_));
2447
2466
  status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
@@ -2782,9 +2801,15 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
2782
2801
  switch (get_context.State()) {
2783
2802
  case GetContext::kNotFound:
2784
2803
  // Keep searching in other files
2804
+ if (get_context.sample()) {
2805
+ sample_collapsible_entry_file_read_inc(f->file_metadata);
2806
+ }
2785
2807
  break;
2786
2808
  case GetContext::kMerge:
2787
2809
  // TODO: update per-level perfcontext user_key_return_count for kMerge
2810
+ if (get_context.sample()) {
2811
+ sample_collapsible_entry_file_read_inc(f->file_metadata);
2812
+ }
2788
2813
  break;
2789
2814
  case GetContext::kFound:
2790
2815
  if (fp.GetHitFileLevel() == 0) {
@@ -2833,6 +2858,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
2833
2858
  case GetContext::kDeleted:
2834
2859
  // Use empty error message for speed
2835
2860
  *status = Status::NotFound();
2861
+ if (get_context.sample()) {
2862
+ sample_collapsible_entry_file_read_inc(f->file_metadata);
2863
+ }
2836
2864
  return;
2837
2865
  case GetContext::kCorrupt:
2838
2866
  *status = Status::Corruption("corrupted key for ", user_key);
@@ -3444,7 +3472,7 @@ bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options,
3444
3472
  // Ensure new invariants on old files
3445
3473
  file_meta->num_deletions =
3446
3474
  std::max(tp->num_deletions, tp->num_range_deletions);
3447
- file_meta->num_entries = std::max(tp->num_entries, tp->num_deletions);
3475
+ file_meta->num_entries = std::max(tp->num_entries, file_meta->num_deletions);
3448
3476
  return true;
3449
3477
  }
3450
3478
 
@@ -3737,7 +3765,8 @@ bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions,
3737
3765
 
3738
3766
  void VersionStorageInfo::ComputeCompactionScore(
3739
3767
  const ImmutableOptions& immutable_options,
3740
- const MutableCFOptions& mutable_cf_options) {
3768
+ const MutableCFOptions& mutable_cf_options,
3769
+ const std::string& full_history_ts_low) {
3741
3770
  double total_downcompact_bytes = 0.0;
3742
3771
  // Historically, score is defined as actual bytes in a level divided by
3743
3772
  // the level's target size, and 1.0 is the threshold for triggering
@@ -3791,15 +3820,20 @@ void VersionStorageInfo::ComputeCompactionScore(
3791
3820
  }
3792
3821
 
3793
3822
  if (compaction_style_ == kCompactionStyleFIFO) {
3794
- auto max_table_files_size =
3795
- mutable_cf_options.compaction_options_fifo.max_table_files_size;
3796
- if (max_table_files_size == 0) {
3823
+ const auto& fifo_opts = mutable_cf_options.compaction_options_fifo;
3824
+ uint64_t effective_size = total_size;
3825
+ uint64_t effective_max = fifo_opts.max_table_files_size;
3826
+ if (fifo_opts.max_data_files_size > 0) {
3827
+ // Blob-aware: include blob file sizes in the total
3828
+ effective_size += GetBlobStats().total_file_size;
3829
+ effective_max = fifo_opts.max_data_files_size;
3830
+ }
3831
+ if (effective_max == 0) {
3797
3832
  // avoid divide 0
3798
- max_table_files_size = 1;
3833
+ effective_max = 1;
3799
3834
  }
3800
- score = static_cast<double>(total_size) / max_table_files_size;
3801
- if (score < 1 &&
3802
- mutable_cf_options.compaction_options_fifo.allow_compaction) {
3835
+ score = static_cast<double>(effective_size) / effective_max;
3836
+ if (score < 1 && fifo_opts.allow_compaction) {
3803
3837
  score = std::max(
3804
3838
  static_cast<double>(num_sorted_runs) /
3805
3839
  mutable_cf_options.level0_file_num_compaction_trigger,
@@ -3936,7 +3970,8 @@ void VersionStorageInfo::ComputeCompactionScore(
3936
3970
  ComputeFilesMarkedForCompaction(max_output_level);
3937
3971
  ComputeBottommostFilesMarkedForCompaction(
3938
3972
  immutable_options.cf_allow_ingest_behind ||
3939
- immutable_options.allow_ingest_behind);
3973
+ immutable_options.allow_ingest_behind,
3974
+ immutable_options.user_comparator, full_history_ts_low);
3940
3975
  ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl);
3941
3976
  ComputeFilesMarkedForPeriodicCompaction(
3942
3977
  immutable_options, mutable_cf_options.periodic_compaction_seconds,
@@ -4527,17 +4562,20 @@ void VersionStorageInfo::GenerateFileLocationIndex() {
4527
4562
  }
4528
4563
  }
4529
4564
 
4530
- void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum,
4531
- bool allow_ingest_behind) {
4565
+ void VersionStorageInfo::UpdateOldestSnapshot(
4566
+ SequenceNumber seqnum, bool allow_ingest_behind, const Comparator* ucmp,
4567
+ const std::string& full_history_ts_low) {
4532
4568
  assert(seqnum >= oldest_snapshot_seqnum_);
4533
4569
  oldest_snapshot_seqnum_ = seqnum;
4534
4570
  if (oldest_snapshot_seqnum_ > bottommost_files_mark_threshold_) {
4535
- ComputeBottommostFilesMarkedForCompaction(allow_ingest_behind);
4571
+ ComputeBottommostFilesMarkedForCompaction(allow_ingest_behind, ucmp,
4572
+ full_history_ts_low);
4536
4573
  }
4537
4574
  }
4538
4575
 
4539
4576
  void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction(
4540
- bool allow_ingest_behind) {
4577
+ bool allow_ingest_behind, const Comparator* ucmp,
4578
+ const std::string& full_history_ts_low) {
4541
4579
  bottommost_files_marked_for_compaction_.clear();
4542
4580
  bottommost_files_mark_threshold_ = kMaxSequenceNumber;
4543
4581
  if (allow_ingest_behind) {
@@ -4558,12 +4596,39 @@ void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction(
4558
4596
  current_time - static_cast<int64_t>(bottommost_file_compaction_delay_);
4559
4597
  }
4560
4598
 
4599
+ // For UDT, we need to check if the file's max timestamp is below
4600
+ // full_history_ts_low. If not, the compaction won't be able to collapse the
4601
+ // timestamp to clean up the tombstone , so marking the file would be futile
4602
+ // and could cause an infinite compaction loop.
4603
+ const bool has_udt = ucmp && ucmp->timestamp_size() > 0;
4604
+
4561
4605
  for (auto& level_and_file : bottommost_files_) {
4562
4606
  if (!level_and_file.second->being_compacted &&
4563
4607
  level_and_file.second->fd.largest_seqno != 0) {
4564
4608
  // largest_seqno might be nonzero due to containing the final key in an
4565
4609
  // earlier compaction, whose seqnum we didn't zero out.
4566
4610
  if (level_and_file.second->fd.largest_seqno < oldest_snapshot_seqnum_) {
4611
+ if (has_udt) {
4612
+ const std::string& max_ts = level_and_file.second->max_timestamp;
4613
+ // If max_timestamp is empty, the file could come from very old
4614
+ // version which does not have timestamp. In that case, we should pick
4615
+ // the file for compaction. After compaction, the file will have
4616
+ // max_timestamp set propertly.
4617
+ if (!max_ts.empty()) {
4618
+ // If full_history_ts_low is empty, it means it was never set, which
4619
+ // means its value is 0. Therefore, it would be always smaller than
4620
+ // max_timestamp
4621
+ if (full_history_ts_low.empty()) {
4622
+ continue;
4623
+ }
4624
+ // If max timestamp >= full_history_ts_low, skip this file
4625
+ if (ucmp->CompareTimestamp(Slice(max_ts), full_history_ts_low) >=
4626
+ 0) {
4627
+ continue;
4628
+ }
4629
+ }
4630
+ }
4631
+
4567
4632
  if (!needs_delay) {
4568
4633
  bottommost_files_marked_for_compaction_.push_back(level_and_file);
4569
4634
  } else if (creation_time_ub > 0) {
@@ -5542,6 +5607,96 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
5542
5607
  s = LogAndApply(cfd, ReadOptions(), WriteOptions(), &edit, mu, db_dir);
5543
5608
  }
5544
5609
 
5610
+ // Content validation: read back the manifest and verify CRC + decode.
5611
+ // Loop up to 2 checks with 1 rewrite attempt in between, so we also verify
5612
+ // the rewritten manifest is healthy.
5613
+ if (s.ok() && verify_manifest_content_on_close_) {
5614
+ TEST_SYNC_POINT("VersionSet::Close:BeforeContentValidation");
5615
+ constexpr int kMaxContentChecks = 2;
5616
+ for (int content_check = 0; s.ok() && content_check < kMaxContentChecks;
5617
+ ++content_check) {
5618
+ // Re-read the manifest file name in case it was rotated by a rewrite
5619
+ std::string content_manifest_name =
5620
+ DescriptorFileName(dbname_, manifest_file_number_);
5621
+ std::unique_ptr<FSSequentialFile> manifest_file;
5622
+ IOStatus content_io_s = fs_->NewSequentialFile(
5623
+ content_manifest_name, fs_->OptimizeForManifestRead(file_options_),
5624
+ &manifest_file, nullptr);
5625
+ if (!content_io_s.ok()) {
5626
+ // Surface I/O errors to the caller — users who call DB::Close() and
5627
+ // check the status should know about filesystem problems.
5628
+ s = content_io_s;
5629
+ ROCKS_LOG_ERROR(db_options_->info_log,
5630
+ "MANIFEST content verification on Close: "
5631
+ "could not open %s for reading: %s\n",
5632
+ content_manifest_name.c_str(),
5633
+ content_io_s.ToString().c_str());
5634
+ break;
5635
+ }
5636
+ std::unique_ptr<SequentialFileReader> manifest_file_reader(
5637
+ new SequentialFileReader(std::move(manifest_file),
5638
+ content_manifest_name,
5639
+ db_options_->log_readahead_size, io_tracer_,
5640
+ db_options_->listeners));
5641
+ LogReporter reporter;
5642
+ Status log_read_status;
5643
+ reporter.status = &log_read_status;
5644
+ log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
5645
+ /*checksum=*/true, /*log_num=*/0);
5646
+ Slice record;
5647
+ std::string scratch;
5648
+ bool content_corrupt = false;
5649
+ while (reader.ReadRecord(&record, &scratch,
5650
+ WALRecoveryMode::kAbsoluteConsistency)) {
5651
+ VersionEdit edit;
5652
+ Status decode_s = edit.DecodeFrom(record);
5653
+ if (!decode_s.ok()) {
5654
+ content_corrupt = true;
5655
+ break;
5656
+ }
5657
+ }
5658
+ if (!content_corrupt && !log_read_status.ok()) {
5659
+ content_corrupt = true;
5660
+ }
5661
+ if (!content_corrupt) {
5662
+ // Manifest is healthy, no need to check again
5663
+ break;
5664
+ }
5665
+ IOStatus corrupt_io_s =
5666
+ IOStatus::Corruption("MANIFEST content validation failed");
5667
+ IOErrorInfo io_error_info(corrupt_io_s, FileOperationType::kVerify,
5668
+ content_manifest_name, /*length=*/0,
5669
+ /*offset=*/0);
5670
+ for (auto& listener : db_options_->listeners) {
5671
+ listener->OnIOError(io_error_info);
5672
+ }
5673
+ corrupt_io_s.PermitUncheckedError();
5674
+ io_error_info.io_status.PermitUncheckedError();
5675
+ if (content_check == 0) {
5676
+ // First check failed — rewrite and verify again
5677
+ ROCKS_LOG_ERROR(db_options_->info_log,
5678
+ "MANIFEST content verification on Close failed, "
5679
+ "filename %s, rewriting manifest\n",
5680
+ content_manifest_name.c_str());
5681
+ ColumnFamilyData* cfd = GetColumnFamilySet()->GetDefault();
5682
+ VersionEdit recovery_edit;
5683
+ assert(cfd);
5684
+ s = LogAndApply(cfd, ReadOptions(), WriteOptions(), &recovery_edit, mu,
5685
+ db_dir);
5686
+ } else {
5687
+ // Rewritten manifest is also corrupt — likely a recurring filesystem
5688
+ // issue. Surface it so DB::Close() callers can detect the problem.
5689
+ ROCKS_LOG_ERROR(db_options_->info_log,
5690
+ "MANIFEST content verification on Close failed again "
5691
+ "after rewrite, filename %s\n",
5692
+ content_manifest_name.c_str());
5693
+ s = Status::Corruption(
5694
+ "MANIFEST content verification failed after rewrite: " +
5695
+ content_manifest_name);
5696
+ }
5697
+ }
5698
+ }
5699
+
5545
5700
  closed_ = true;
5546
5701
  return s;
5547
5702
  }
@@ -5558,9 +5713,10 @@ VersionSet::~VersionSet() {
5558
5713
  // Using uncache_aggressiveness=0 overrides any previous marking to
5559
5714
  // attempt to uncache the file's blocks (which after cleaning up
5560
5715
  // column families could cause use-after-free)
5561
- TableCache::ReleaseObsolete(table_cache_, file.metadata->fd.GetNumber(),
5562
- file.metadata->table_reader_handle,
5563
- /*uncache_aggressiveness=*/0);
5716
+ TableCache::ReleaseObsolete(
5717
+ table_cache_, file.metadata->fd.GetNumber(),
5718
+ file.metadata->fd.pinned_reader.GetCacheHandle(),
5719
+ /*uncache_aggressiveness=*/0);
5564
5720
  file.DeleteMetadata();
5565
5721
  }
5566
5722
  obsolete_files_.clear();
@@ -5624,6 +5780,8 @@ void VersionSet::UpdatedMutableDbOptions(
5624
5780
  max_manifest_space_amp_pct_ = static_cast<unsigned>(
5625
5781
  std::max(updated_options.max_manifest_space_amp_pct, 0));
5626
5782
  manifest_preallocation_size_ = updated_options.manifest_preallocation_size;
5783
+ verify_manifest_content_on_close_ =
5784
+ updated_options.verify_manifest_content_on_close;
5627
5785
  TuneMaxManifestFileSize();
5628
5786
  }
5629
5787
 
@@ -5639,7 +5797,8 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
5639
5797
  // compute new compaction score
5640
5798
  v->storage_info()->ComputeCompactionScore(
5641
5799
  column_family_data->ioptions(),
5642
- column_family_data->GetLatestMutableCFOptions());
5800
+ column_family_data->GetLatestMutableCFOptions(),
5801
+ column_family_data->GetFullHistoryTsLow());
5643
5802
 
5644
5803
  // Mark v finalized
5645
5804
  v->storage_info_.SetFinalized();
@@ -6497,7 +6656,8 @@ Status VersionSet::Recover(
6497
6656
  read_only, column_families, const_cast<VersionSet*>(this),
6498
6657
  /*track_found_and_missing_files=*/false, no_error_if_files_missing,
6499
6658
  io_tracer_, read_options, /*allow_incomplete_valid_version=*/false,
6500
- EpochNumberRequirement::kMightMissing);
6659
+ EpochNumberRequirement::kMightMissing,
6660
+ /*skip_load_table_files=*/db_options_->open_files_async);
6501
6661
  handler.Iterate(reader, &log_read_status);
6502
6662
  s = handler.status();
6503
6663
  if (s.ok()) {
@@ -7102,7 +7262,6 @@ Status VersionSet::WriteCurrentStateToManifest(
7102
7262
 
7103
7263
  for (const auto& f : level_files) {
7104
7264
  assert(f);
7105
-
7106
7265
  edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(),
7107
7266
  f->fd.GetFileSize(), f->smallest, f->largest,
7108
7267
  f->fd.smallest_seqno, f->fd.largest_seqno,
@@ -7111,7 +7270,8 @@ Status VersionSet::WriteCurrentStateToManifest(
7111
7270
  f->file_creation_time, f->epoch_number, f->file_checksum,
7112
7271
  f->file_checksum_func_name, f->unique_id,
7113
7272
  f->compensated_range_deletion_size, f->tail_size,
7114
- f->user_defined_timestamps_persisted);
7273
+ f->user_defined_timestamps_persisted, f->min_timestamp,
7274
+ f->max_timestamp);
7115
7275
  }
7116
7276
  }
7117
7277
 
@@ -7550,7 +7710,6 @@ InternalIterator* VersionSet::MakeInputIterator(
7550
7710
  list[num++] = new LevelIterator(
7551
7711
  cfd->table_cache(), read_options, file_options_compactions,
7552
7712
  cfd->internal_comparator(), flevel, c->mutable_cf_options(),
7553
- /*should_sample=*/false,
7554
7713
  /*no per level latency histogram=*/nullptr,
7555
7714
  TableReaderCaller::kCompaction, /*skip_filters=*/false,
7556
7715
  /*level=*/static_cast<int>(c->level(which)), range_del_agg,
@@ -7816,10 +7975,12 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
7816
7975
  InternalStats* internal_stats = cfd->internal_stats();
7817
7976
 
7818
7977
  TableCache::TypedHandle* handle = nullptr;
7978
+ TableReader* table_reader = nullptr;
7819
7979
  FileMetaData meta_copy = meta;
7820
7980
  status = table_cache->FindTable(
7821
7981
  read_options, file_opts, *icmp, meta_copy, &handle, cf_opts,
7822
- /*no_io=*/false, internal_stats->GetFileReadHist(level), false, level,
7982
+ &table_reader, /*no_io=*/false, internal_stats->GetFileReadHist(level),
7983
+ false, level,
7823
7984
  /*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
7824
7985
  meta_copy.temperature);
7825
7986
  if (handle) {
@@ -200,7 +200,8 @@ class VersionStorageInfo {
200
200
  // REQUIRES: db_mutex held!!
201
201
  // TODO find a better way to pass compaction_options_fifo.
202
202
  void ComputeCompactionScore(const ImmutableOptions& immutable_options,
203
- const MutableCFOptions& mutable_cf_options);
203
+ const MutableCFOptions& mutable_cf_options,
204
+ const std::string& full_history_ts_low);
204
205
 
205
206
  // Estimate est_comp_needed_bytes_
206
207
  void EstimateCompactionBytesNeeded(
@@ -230,8 +231,15 @@ class VersionStorageInfo {
230
231
  // oldest snapshot changes as that is when bottom-level files can become
231
232
  // eligible for compaction.
232
233
  //
234
+ // For columns with User Defined Timestamps (UDT), also checks that the
235
+ // file's largest timestamp is below full_history_ts_low before marking,
236
+ // since compaction can only collapse timestamp when it is below this
237
+ // threshold.
238
+ //
233
239
  // REQUIRES: DB mutex held
234
- void ComputeBottommostFilesMarkedForCompaction(bool allow_ingest_behind);
240
+ void ComputeBottommostFilesMarkedForCompaction(
241
+ bool allow_ingest_behind, const Comparator* ucmp,
242
+ const std::string& full_history_ts_low);
235
243
 
236
244
  // This computes files_marked_for_forced_blob_gc_ and is called by
237
245
  // ComputeCompactionScore()
@@ -248,7 +256,8 @@ class VersionStorageInfo {
248
256
  // files marked for compaction.
249
257
  // REQUIRES: DB mutex held
250
258
  void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum,
251
- bool allow_ingest_behind);
259
+ bool allow_ingest_behind, const Comparator* ucmp,
260
+ const std::string& full_history_ts_low);
252
261
 
253
262
  int MaxInputLevel() const;
254
263
  int MaxOutputLevel(bool allow_ingest_behind) const;
@@ -1425,6 +1434,29 @@ class VersionSet {
1425
1434
  return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst);
1426
1435
  }
1427
1436
 
1437
+ // Sync last_sequence_ with last_allocated_sequence_. This should be called
1438
+ // during error recovery to ensure that any sequence numbers that were
1439
+ // allocated (written to WAL) but not yet published are accounted for when
1440
+ // creating new memtables/WALs. This prevents the "sequence number going
1441
+ // backwards" corruption on subsequent recovery.
1442
+ //
1443
+ // This is necessary because with two_write_queues=true, writes allocate
1444
+ // sequence numbers via FetchAddLastAllocatedSequence() before the write
1445
+ // is complete, but only publish via SetLastSequence() after success.
1446
+ // If an error occurs and recovery creates new memtables, SwitchMemtable
1447
+ // uses LastSequence() which may be lower than already-allocated sequences.
1448
+ //
1449
+ // REQUIRED: DB mutex is held and no concurrent writers are active (i.e.,
1450
+ // after WaitForBackgroundWork() in ResumeImpl).
1451
+ void SyncLastSequenceWithAllocated() {
1452
+ uint64_t alloc_seq =
1453
+ last_allocated_sequence_.load(std::memory_order_seq_cst);
1454
+ uint64_t last_seq = last_sequence_.load(std::memory_order_acquire);
1455
+ if (alloc_seq > last_seq) {
1456
+ last_sequence_.store(alloc_seq, std::memory_order_release);
1457
+ }
1458
+ }
1459
+
1428
1460
  // Mark the specified file number as used.
1429
1461
  // REQUIRED: this is only called during single-threaded recovery or repair.
1430
1462
  void MarkFileNumberUsed(uint64_t number);
@@ -1706,6 +1738,9 @@ class VersionSet {
1706
1738
  // The last sequence number of data committed to the descriptor (manifest
1707
1739
  // file).
1708
1740
  SequenceNumber descriptor_last_sequence_ = 0;
1741
+ // See write_prepared_txn.h for a more detailed description of how Write
1742
+ // Prepared transactions work, with concrete examples.
1743
+ //
1709
1744
  // The last seq that is already allocated. It is applicable only when we have
1710
1745
  // two write queues. In that case seq might or might not have appreated in
1711
1746
  // memtable but it is expected to appear in the WAL.
@@ -1744,6 +1779,8 @@ class VersionSet {
1744
1779
  unsigned max_manifest_space_amp_pct_;
1745
1780
  // Saved copy from (Mutable)DBOptions
1746
1781
  size_t manifest_preallocation_size_;
1782
+ // Saved copy from (Mutable)DBOptions
1783
+ bool verify_manifest_content_on_close_;
1747
1784
 
1748
1785
  // Obsolete files, or during DB shutdown any files not referenced by what's
1749
1786
  // left of the in-memory LSM state.
@@ -58,6 +58,11 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
58
58
 
59
59
  if (get_context.sample()) {
60
60
  sample_file_read_inc(f->file_metadata);
61
+ if (get_context.State() == GetContext::kNotFound ||
62
+ get_context.State() == GetContext::kMerge ||
63
+ get_context.State() == GetContext::kDeleted) {
64
+ sample_collapsible_entry_file_read_inc(f->file_metadata);
65
+ }
61
66
  }
62
67
  batch_size++;
63
68
  num_index_read += get_context.get_context_stats_.num_index_read;