@nxtedition/rocksdb 15.4.1 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. package/binding.cc +70 -23
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/index.js +70 -10
  395. package/iterator.js +25 -3
  396. package/max_rev_operator.h +9 -5
  397. package/package.json +1 -1
  398. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  399. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -12,6 +12,7 @@
12
12
  #include "db/db_impl/db_impl.h"
13
13
  #include "db/error_handler.h"
14
14
  #include "db/periodic_task_scheduler.h"
15
+ #include "db/version_util.h"
15
16
  #include "env/composite_env_wrapper.h"
16
17
  #include "file/filename.h"
17
18
  #include "file/read_write_util.h"
@@ -657,7 +658,8 @@ Status DBImpl::Recover(
657
658
  f->file_creation_time, f->epoch_number,
658
659
  f->file_checksum, f->file_checksum_func_name,
659
660
  f->unique_id, f->compensated_range_deletion_size,
660
- f->tail_size, f->user_defined_timestamps_persisted);
661
+ f->tail_size, f->user_defined_timestamps_persisted,
662
+ f->min_timestamp, f->max_timestamp);
661
663
  ROCKS_LOG_WARN(immutable_db_options_.info_log,
662
664
  "[%s] Moving #%" PRIu64
663
665
  " from from_level-%d to from_level-%d %" PRIu64
@@ -1491,8 +1493,8 @@ Status DBImpl::ProcessLogRecord(
1491
1493
  }
1492
1494
 
1493
1495
  assert(process_status.ok());
1494
- process_status = InsertLogRecordToMemtable(batch_to_use, wal_number,
1495
- next_sequence, &has_valid_writes);
1496
+ process_status = InsertLogRecordToMemtable(
1497
+ batch_to_use, wal_number, next_sequence, &has_valid_writes, read_only);
1496
1498
  MaybeIgnoreError(&process_status);
1497
1499
  // We are treating this as a failure while reading since we read valid
1498
1500
  // blocks that do not form coherent data
@@ -1570,7 +1572,8 @@ void DBImpl::MaybeReviseStopReplayForCorruption(
1570
1572
  Status DBImpl::InsertLogRecordToMemtable(WriteBatch* batch_to_use,
1571
1573
  uint64_t wal_number,
1572
1574
  SequenceNumber* next_sequence,
1573
- bool* has_valid_writes) {
1575
+ bool* has_valid_writes,
1576
+ bool read_only) {
1574
1577
  // If column family was not found, it might mean that the WAL write
1575
1578
  // batch references to the column family that was dropped after the
1576
1579
  // insert. We don't want to fail the whole write batch in that case --
@@ -1583,6 +1586,34 @@ Status DBImpl::InsertLogRecordToMemtable(WriteBatch* batch_to_use,
1583
1586
  &trim_history_scheduler_, true, wal_number, this,
1584
1587
  false /* concurrent_memtable_writes */, next_sequence, has_valid_writes,
1585
1588
  seq_per_batch_, batch_per_txn_);
1589
+
1590
+ // Check WriteBufferManager global limit during recovery.
1591
+ // When multiple RocksDB instances share a WriteBufferManager, a recovering
1592
+ // instance could exceed the global memory limit. Schedule flushes when needed
1593
+ // to prevent OOM during WAL recovery.
1594
+ //
1595
+ // Skip scheduling in read-only mode since flushes cannot be performed and
1596
+ // the scheduler would never be drained, causing assertion failures on
1597
+ // duplicate ScheduleWork() calls.
1598
+ //
1599
+ // TODO: Currently we schedule all CFs with non-empty memtables for flush
1600
+ // (similar to the atomic_flush=false path in the normal write flow). This
1601
+ // may produce more, smaller L0 files in some CFs. A future improvement
1602
+ // could flush only the oldest CF or pick CFs more selectively to reduce
1603
+ // unnecessary L0 file creation.
1604
+ if (status.ok() && *has_valid_writes && !read_only &&
1605
+ immutable_db_options_.enforce_write_buffer_manager_during_recovery &&
1606
+ write_buffer_manager_ != nullptr &&
1607
+ write_buffer_manager_->ShouldFlush()) {
1608
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
1609
+ if (cfd->mem() != nullptr && cfd->mem()->GetFirstSequenceNumber() != 0 &&
1610
+ !cfd->mem()->HasFlushScheduled()) {
1611
+ cfd->mem()->MarkFlushScheduled();
1612
+ flush_scheduler_.ScheduleWork(cfd);
1613
+ }
1614
+ }
1615
+ }
1616
+
1586
1617
  return status;
1587
1618
  }
1588
1619
 
@@ -2263,7 +2294,7 @@ Status DB::OpenAndTrimHistory(
2263
2294
  return s;
2264
2295
  }
2265
2296
 
2266
- DB* db = nullptr;
2297
+ std::unique_ptr<DB> db;
2267
2298
  s = DB::Open(db_options, dbname, column_families, handles, &db);
2268
2299
  if (!s.ok()) {
2269
2300
  return s;
@@ -2272,7 +2303,7 @@ Status DB::OpenAndTrimHistory(
2272
2303
  CompactRangeOptions options;
2273
2304
  options.bottommost_level_compaction =
2274
2305
  BottommostLevelCompaction::kForceOptimized;
2275
- auto db_impl = static_cast_with_check<DBImpl>(db);
2306
+ auto db_impl = static_cast_with_check<DBImpl>(db.get());
2276
2307
  for (auto handle : *handles) {
2277
2308
  assert(handle != nullptr);
2278
2309
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
@@ -2294,14 +2325,14 @@ Status DB::OpenAndTrimHistory(
2294
2325
  assert(temp_s.ok());
2295
2326
  }
2296
2327
  handles->clear();
2297
- delete db;
2328
+ db.reset();
2298
2329
  };
2299
2330
  if (!s.ok()) {
2300
2331
  clean_op();
2301
2332
  return s;
2302
2333
  }
2303
2334
 
2304
- dbptr->reset(db);
2335
+ *dbptr = std::move(db);
2305
2336
  return s;
2306
2337
  }
2307
2338
 
@@ -2656,6 +2687,10 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
2656
2687
  impl->DeleteObsoleteFiles();
2657
2688
  TEST_SYNC_POINT("DBImpl::Open:AfterDeleteFiles");
2658
2689
  impl->MaybeScheduleFlushOrCompaction();
2690
+
2691
+ if (impl->immutable_db_options_.open_files_async) {
2692
+ impl->ScheduleAsyncFileOpening();
2693
+ }
2659
2694
  impl->mutex_.Unlock();
2660
2695
  }
2661
2696
 
@@ -2704,4 +2739,125 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
2704
2739
  }
2705
2740
  return s;
2706
2741
  }
2742
+
2743
+ struct AsyncFileOpenContext {
2744
+ DBImpl* db = nullptr;
2745
+ FileOptions file_options;
2746
+ std::vector<Version*> versions;
2747
+
2748
+ AsyncFileOpenContext() = default;
2749
+ AsyncFileOpenContext(const AsyncFileOpenContext&) = delete;
2750
+ AsyncFileOpenContext& operator=(const AsyncFileOpenContext&) = delete;
2751
+ AsyncFileOpenContext(AsyncFileOpenContext&&) = delete;
2752
+ AsyncFileOpenContext& operator=(AsyncFileOpenContext&&) = delete;
2753
+
2754
+ ~AsyncFileOpenContext() {
2755
+ db->mutex()->AssertHeld();
2756
+ for (auto* v : versions) {
2757
+ // must unref version before cfd
2758
+ ColumnFamilyData* cfd = v->cfd();
2759
+ v->Unref();
2760
+ cfd->UnrefAndTryDelete();
2761
+ }
2762
+ }
2763
+ };
2764
+
2765
+ void DBImpl::ScheduleAsyncFileOpening() {
2766
+ mutex_.AssertHeld();
2767
+
2768
+ auto* ctx = new AsyncFileOpenContext();
2769
+ ctx->db = this;
2770
+ ctx->file_options = versions_->file_options();
2771
+
2772
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
2773
+ assert(!cfd->IsDropped());
2774
+ Version* current = cfd->current();
2775
+ VersionStorageInfo* vstorage = current->storage_info();
2776
+ bool has_files = false;
2777
+ for (int level = 0; level < vstorage->num_levels() && !has_files; level++) {
2778
+ has_files = !vstorage->LevelFiles(level).empty();
2779
+ }
2780
+ if (has_files) {
2781
+ cfd->Ref();
2782
+ current->Ref();
2783
+ ctx->versions.push_back(current);
2784
+ }
2785
+ }
2786
+
2787
+ bg_async_file_open_state_ = AsyncFileOpenState::kScheduled;
2788
+
2789
+ // since this is a one time job, best to schedule it with high priority
2790
+ env_->Schedule(&DBImpl::BGWorkAsyncFileOpen, ctx, Env::Priority::HIGH,
2791
+ nullptr);
2792
+ }
2793
+
2794
+ void DBImpl::MarkAsyncFileOpenNotNeeded() {
2795
+ mutex_.AssertHeld();
2796
+ assert(bg_async_file_open_state_ == AsyncFileOpenState::kNotScheduled);
2797
+ bg_async_file_open_state_ = AsyncFileOpenState::kComplete;
2798
+ }
2799
+
2800
+ void DBImpl::BGWorkAsyncFileOpen(void* arg) {
2801
+ TEST_SYNC_POINT("DBImpl::BGWorkAsyncFileOpen::Start");
2802
+
2803
+ AsyncFileOpenContext* raw_ctx = static_cast<AsyncFileOpenContext*>(arg);
2804
+ DBImpl* db = raw_ctx->db;
2805
+
2806
+ auto deleter = [](AsyncFileOpenContext* p) {
2807
+ auto* dbPtr = p->db;
2808
+ InstrumentedMutexLock l(&dbPtr->mutex_);
2809
+ delete p;
2810
+ dbPtr->bg_async_file_open_state_ = AsyncFileOpenState::kComplete;
2811
+ dbPtr->bg_cv_.SignalAll();
2812
+ };
2813
+ std::unique_ptr<AsyncFileOpenContext, decltype(deleter)> ctx(raw_ctx,
2814
+ deleter);
2815
+
2816
+ ReadOptions ro;
2817
+ for (size_t i = 0; i < ctx->versions.size(); i++) {
2818
+ auto* version = ctx->versions[i];
2819
+ ColumnFamilyData* cfd = version->cfd();
2820
+
2821
+ // Skip column families that were dropped after scheduling
2822
+ if (cfd->IsDropped()) {
2823
+ continue;
2824
+ }
2825
+
2826
+ VersionStorageInfo* vstorage = version->storage_info();
2827
+
2828
+ MutableCFOptions mutable_cf_options;
2829
+ {
2830
+ InstrumentedMutexLock l(&db->mutex_);
2831
+ mutable_cf_options = cfd->GetLatestMutableCFOptions();
2832
+ }
2833
+ size_t max_file_size_for_l0_meta_pin =
2834
+ MaxFileSizeForL0MetaPin(mutable_cf_options);
2835
+
2836
+ std::vector<std::pair<FileMetaData*, int>> files_meta;
2837
+ for (int level = 0; level < vstorage->num_levels(); level++) {
2838
+ for (FileMetaData* file_meta : vstorage->LevelFiles(level)) {
2839
+ files_meta.emplace_back(file_meta, level);
2840
+ }
2841
+ }
2842
+
2843
+ Status s = LoadTableHandlersHelper(
2844
+ files_meta, cfd->table_cache(), ctx->file_options,
2845
+ *vstorage->InternalComparator(), cfd->internal_stats(),
2846
+ db->immutable_db_options_.max_file_opening_threads,
2847
+ false /* prefetch_index_and_filter_in_cache */, mutable_cf_options,
2848
+ max_file_size_for_l0_meta_pin, ro, &db->shutting_down_);
2849
+ if (!s.ok()) {
2850
+ ROCKS_LOG_ERROR(
2851
+ db->immutable_db_options_.info_log,
2852
+ "BGWorkAsyncFileOpen: LoadTableHandlers failed for CF %s: "
2853
+ "%s",
2854
+ cfd->GetName().c_str(), s.ToString().c_str());
2855
+ InstrumentedMutexLock l(&db->mutex_);
2856
+ db->error_handler_.SetBGError(s, BackgroundErrorReason::kAsyncFileOpen);
2857
+ break;
2858
+ }
2859
+ }
2860
+ TEST_SYNC_POINT("DBImpl::BGWorkAsyncFileOpen:Done");
2861
+ }
2862
+
2707
2863
  } // namespace ROCKSDB_NAMESPACE
@@ -357,6 +357,12 @@ Status DBImplReadOnly::OpenForReadOnlyWithoutCheck(
357
357
  sv_context.NewSuperVersion();
358
358
  cfd->InstallSuperVersion(&sv_context, &impl->mutex_);
359
359
  }
360
+
361
+ impl->opened_successfully_ = true;
362
+
363
+ if (db_options.open_files_async) {
364
+ impl->ScheduleAsyncFileOpening();
365
+ }
360
366
  }
361
367
  impl->mutex_.Unlock();
362
368
  sv_context.Clean();
@@ -121,6 +121,11 @@ class DBImplReadOnly : public DBImpl {
121
121
  return Status::NotSupported("Not supported operation in read only mode.");
122
122
  }
123
123
 
124
+ using DBImpl::FlushWAL;
125
+ Status FlushWAL(const FlushWALOptions& /*options*/) override {
126
+ return Status::NotSupported("Not supported operation in read only mode.");
127
+ }
128
+
124
129
  using DB::IngestExternalFile;
125
130
  Status IngestExternalFile(
126
131
  ColumnFamilyHandle* /*column_family*/,
@@ -43,7 +43,6 @@ Status DBImplSecondary::Recover(
43
43
  RecoveryContext* /*recovery_ctx*/, bool* /*can_retry*/) {
44
44
  mutex_.AssertHeld();
45
45
 
46
- JobContext job_context(0);
47
46
  Status s;
48
47
  s = static_cast<ReactiveVersionSet*>(versions_.get())
49
48
  ->Recover(column_families, &manifest_reader_, &manifest_reporter_,
@@ -61,24 +60,12 @@ Status DBImplSecondary::Recover(
61
60
  max_total_in_memory_state_ += mutable_cf_options.write_buffer_size *
62
61
  mutable_cf_options.max_write_buffer_number;
63
62
  }
64
- if (s.ok()) {
65
- default_cf_handle_ = new ColumnFamilyHandleImpl(
66
- versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
67
- default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
63
+ default_cf_handle_ = new ColumnFamilyHandleImpl(
64
+ versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
65
+ default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
68
66
 
69
- std::unordered_set<ColumnFamilyData*> cfds_changed;
70
- s = FindAndRecoverLogFiles(&cfds_changed, &job_context);
71
- }
72
-
73
- if (s.IsPathNotFound()) {
74
- ROCKS_LOG_INFO(immutable_db_options_.info_log,
75
- "Secondary tries to read WAL, but WAL file(s) have already "
76
- "been purged by primary.");
77
- s = Status::OK();
78
- }
79
67
  // TODO: update options_file_number_ needed?
80
68
 
81
- job_context.Clean();
82
69
  return s;
83
70
  }
84
71
 
@@ -88,6 +75,7 @@ Status DBImplSecondary::FindAndRecoverLogFiles(
88
75
  JobContext* job_context) {
89
76
  assert(nullptr != cfds_changed);
90
77
  assert(nullptr != job_context);
78
+ TEST_SYNC_POINT("DBImplSecondary::FindAndRecoverLogFiles:Begin");
91
79
  Status s;
92
80
  std::vector<uint64_t> logs;
93
81
  s = FindNewLogNumbers(&logs);
@@ -509,10 +497,6 @@ Iterator* DBImplSecondary::NewIterator(const ReadOptions& _read_options,
509
497
  if (read_options.io_activity == Env::IOActivity::kUnknown) {
510
498
  read_options.io_activity = Env::IOActivity::kDBIterator;
511
499
  }
512
- if (read_options.managed) {
513
- return NewErrorIterator(
514
- Status::NotSupported("Managed iterator is not supported anymore."));
515
- }
516
500
  if (read_options.read_tier == kPersistedTier) {
517
501
  return NewErrorIterator(Status::NotSupported(
518
502
  "ReadTier::kPersistedData is not yet supported in iterators."));
@@ -588,9 +572,6 @@ Status DBImplSecondary::NewIterators(
588
572
  if (read_options.io_activity == Env::IOActivity::kUnknown) {
589
573
  read_options.io_activity = Env::IOActivity::kDBIterator;
590
574
  }
591
- if (read_options.managed) {
592
- return Status::NotSupported("Managed iterator is not supported anymore.");
593
- }
594
575
  if (read_options.read_tier == kPersistedTier) {
595
576
  return Status::NotSupported(
596
577
  "ReadTier::kPersistedData is not yet supported in iterators.");
@@ -747,6 +728,17 @@ Status DB::OpenAsSecondary(
747
728
  const std::string& secondary_path,
748
729
  const std::vector<ColumnFamilyDescriptor>& column_families,
749
730
  std::vector<ColumnFamilyHandle*>* handles, std::unique_ptr<DB>* dbptr) {
731
+ return DBImplSecondary::OpenAsSecondaryImpl(
732
+ db_options, dbname, secondary_path, column_families, handles, dbptr,
733
+ /*recover_wal=*/true);
734
+ }
735
+
736
+ Status DBImplSecondary::OpenAsSecondaryImpl(
737
+ const DBOptions& db_options, const std::string& dbname,
738
+ const std::string& secondary_path,
739
+ const std::vector<ColumnFamilyDescriptor>& column_families,
740
+ std::vector<ColumnFamilyHandle*>* handles, std::unique_ptr<DB>* dbptr,
741
+ bool recover_wal) {
750
742
  *dbptr = nullptr;
751
743
 
752
744
  DBOptions tmp_opts(db_options);
@@ -791,7 +783,21 @@ Status DB::OpenAsSecondary(
791
783
  impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
792
784
 
793
785
  impl->mutex_.Lock();
786
+ JobContext job_context(0);
794
787
  s = impl->Recover(column_families, true, false, false);
788
+ // WAL recovery is optional: DB::OpenAsSecondary() needs it to replay
789
+ // memtable data, while DB::OpenAndCompact() skips it since remote
790
+ // compaction only needs LSM state from MANIFEST.
791
+ if (s.ok() && recover_wal) {
792
+ std::unordered_set<ColumnFamilyData*> cfds_changed;
793
+ s = impl->FindAndRecoverLogFiles(&cfds_changed, &job_context);
794
+ if (s.IsPathNotFound()) {
795
+ ROCKS_LOG_INFO(impl->immutable_db_options_.info_log,
796
+ "Secondary tries to read WAL, but WAL file(s) have "
797
+ "already been purged by primary.");
798
+ s = Status::OK();
799
+ }
800
+ }
795
801
  if (s.ok()) {
796
802
  for (const auto& cf : column_families) {
797
803
  auto cfd =
@@ -809,9 +815,11 @@ Status DB::OpenAsSecondary(
809
815
  sv_context.NewSuperVersion();
810
816
  cfd->InstallSuperVersion(&sv_context, &impl->mutex_);
811
817
  }
818
+ impl->MarkAsyncFileOpenNotNeeded();
812
819
  }
813
820
  impl->mutex_.Unlock();
814
821
  sv_context.Clean();
822
+ job_context.Clean();
815
823
  if (s.ok()) {
816
824
  dbptr->reset(impl);
817
825
  for (auto h : *handles) {
@@ -987,11 +995,8 @@ Status DBImplSecondary::ParseCompactionProgressFile(
987
995
  Slice slice;
988
996
  std::string record;
989
997
 
990
- while (compaction_progress_reader.ReadRecord(&slice, &record)) {
991
- if (!reader_status.ok()) {
992
- return reader_status;
993
- }
994
-
998
+ while (compaction_progress_reader.ReadRecord(&slice, &record) &&
999
+ reader_status.ok()) {
995
1000
  VersionEdit edit;
996
1001
  s = edit.DecodeFrom(slice);
997
1002
  if (!s.ok()) {
@@ -1004,6 +1009,10 @@ Status DBImplSecondary::ParseCompactionProgressFile(
1004
1009
  }
1005
1010
  }
1006
1011
 
1012
+ if (!reader_status.ok()) {
1013
+ return reader_status;
1014
+ }
1015
+
1007
1016
  if (!s.ok()) {
1008
1017
  return s;
1009
1018
  }
@@ -1544,18 +1553,20 @@ Status DB::OpenAndCompact(
1544
1553
  }
1545
1554
  }
1546
1555
 
1547
- // 5. Open db As Secondary
1548
- DB* db;
1556
+ // 5. Open db As Secondary (skip WAL recovery — remote compaction only
1557
+ // needs LSM state from MANIFEST, not memtable data from WAL replay)
1558
+ std::unique_ptr<DB> db;
1549
1559
  std::vector<ColumnFamilyHandle*> handles;
1550
- s = DB::OpenAsSecondary(db_options, name, output_directory, column_families,
1551
- &handles, &db);
1560
+ s = DBImplSecondary::OpenAsSecondaryImpl(db_options, name, output_directory,
1561
+ column_families, &handles, &db,
1562
+ /*recover_wal=*/false);
1552
1563
  if (!s.ok()) {
1553
1564
  return s;
1554
1565
  }
1555
1566
  assert(db);
1556
1567
 
1557
1568
  TEST_SYNC_POINT_CALLBACK(
1558
- "DBImplSecondary::OpenAndCompact::AfterOpenAsSecondary:0", db);
1569
+ "DBImplSecondary::OpenAndCompact::AfterOpenAsSecondary:0", db.get());
1559
1570
 
1560
1571
  // 6. Find the handle of the Column Family that this will compact
1561
1572
  ColumnFamilyHandle* cfh = nullptr;
@@ -1570,7 +1581,8 @@ Status DB::OpenAndCompact(
1570
1581
  // 7. Run the compaction without installation.
1571
1582
  // Output will be stored in the directory specified by output_directory
1572
1583
  CompactionServiceResult compaction_result;
1573
- DBImplSecondary* db_secondary = static_cast_with_check<DBImplSecondary>(db);
1584
+ DBImplSecondary* db_secondary =
1585
+ static_cast_with_check<DBImplSecondary>(db.get());
1574
1586
  s = db_secondary->CompactWithoutInstallation(options, cfh, compaction_input,
1575
1587
  &compaction_result);
1576
1588
 
@@ -1581,7 +1593,7 @@ Status DB::OpenAndCompact(
1581
1593
  for (auto& handle : handles) {
1582
1594
  delete handle;
1583
1595
  }
1584
- delete db;
1596
+ db.reset();
1585
1597
  if (s.ok()) {
1586
1598
  return serialization_status;
1587
1599
  } else {
@@ -78,8 +78,9 @@ class DBImplSecondary : public DBImpl {
78
78
  std::string secondary_path);
79
79
  ~DBImplSecondary() override;
80
80
 
81
- // Recover by replaying MANIFEST and WAL. Also initialize manifest_reader_
82
- // and log_readers_ to facilitate future operations.
81
+ // Recover by replaying MANIFEST only. Also initialize manifest_reader_
82
+ // to facilitate future operations. WAL recovery, if needed, is done
83
+ // separately after opening (see DB::OpenAsSecondary).
83
84
  Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
84
85
  bool read_only, bool error_if_wal_file_exists,
85
86
  bool error_if_data_exists_in_wals, bool is_retry = false,
@@ -194,10 +195,11 @@ class DBImplSecondary : public DBImpl {
194
195
  return Status::NotSupported("Not supported operation in secondary mode.");
195
196
  }
196
197
 
197
- Status GetLiveFiles(std::vector<std::string>&,
198
- uint64_t* /*manifest_file_size*/,
199
- bool /*flush_memtable*/ = true) override {
200
- return Status::NotSupported("Not supported operation in secondary mode.");
198
+ Status GetLiveFiles(std::vector<std::string>& ret,
199
+ uint64_t* manifest_file_size,
200
+ bool /*flush_memtable*/) override {
201
+ return DBImpl::GetLiveFiles(ret, manifest_file_size,
202
+ false /* flush_memtable */);
201
203
  }
202
204
 
203
205
  using DBImpl::Flush;
@@ -216,9 +218,9 @@ class DBImplSecondary : public DBImpl {
216
218
 
217
219
  using DBImpl::SetOptions;
218
220
  Status SetOptions(
219
- ColumnFamilyHandle* /*cfd*/,
220
- const std::unordered_map<std::string, std::string>& /*options_map*/)
221
- override {
221
+ const std::unordered_map<ColumnFamilyHandle*,
222
+ std::unordered_map<std::string, std::string>>&
223
+ /*column_families_opts_map*/) override {
222
224
  // Currently not supported because changing certain options may cause
223
225
  // flush/compaction and/or write to MANIFEST.
224
226
  return Status::NotSupported("Not supported operation in secondary mode.");
@@ -384,6 +386,17 @@ class DBImplSecondary : public DBImpl {
384
386
  uint64_t CalculateResumedCompactionBytes(
385
387
  const CompactionProgress& compaction_progress) const;
386
388
 
389
+ // Internal helper for opening a secondary instance. Recover() replays
390
+ // MANIFEST only. When recover_wal is true, WAL files are also replayed
391
+ // (needed by DB::OpenAsSecondary). When false, WAL replay is skipped
392
+ // (used by DB::OpenAndCompact which only needs LSM state).
393
+ static Status OpenAsSecondaryImpl(
394
+ const DBOptions& db_options, const std::string& dbname,
395
+ const std::string& secondary_path,
396
+ const std::vector<ColumnFamilyDescriptor>& column_families,
397
+ std::vector<ColumnFamilyHandle*>* handles, std::unique_ptr<DB>* dbptr,
398
+ bool recover_wal);
399
+
387
400
  // Cache log readers for each log number, used for continue WAL replay
388
401
  // after recovery
389
402
  std::map<uint64_t, std::unique_ptr<LogReaderContainer>> log_readers_;
@@ -23,6 +23,7 @@
23
23
  #include "memory/arena.h"
24
24
  #include "monitoring/perf_context_imp.h"
25
25
  #include "rocksdb/env.h"
26
+ #include "rocksdb/io_dispatcher.h"
26
27
  #include "rocksdb/iterator.h"
27
28
  #include "rocksdb/merge_operator.h"
28
29
  #include "rocksdb/options.h"
@@ -1621,6 +1622,14 @@ void DBIter::Prepare(const MultiScanArgs& scan_opts) {
1621
1622
  new_scan_opts.emplace(scan_opts);
1622
1623
  scan_opts_.swap(new_scan_opts);
1623
1624
  scan_index_ = 0;
1625
+
1626
+ // Create a shared IODispatcher if not provided. This allows all
1627
+ // BlockBasedTableIterators in this scan to share a single dispatcher,
1628
+ // enabling better IO coordination and future rate limiting.
1629
+ if (!scan_opts_.value().io_dispatcher) {
1630
+ scan_opts_->io_dispatcher.reset(NewIODispatcher());
1631
+ }
1632
+
1624
1633
  if (!scan_opts.empty()) {
1625
1634
  iter_.Prepare(&scan_opts_.value());
1626
1635
  } else {