@nxtedition/rocksdb 15.4.1 → 15.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (399) hide show
  1. package/binding.cc +24 -15
  2. package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
  3. package/deps/rocksdb/rocksdb/BUCK +42 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
  5. package/deps/rocksdb/rocksdb/Makefile +59 -32
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
  8. package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
  11. package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
  17. package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
  24. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
  26. package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
  27. package/deps/rocksdb/rocksdb/db/builder.h +7 -0
  28. package/deps/rocksdb/rocksdb/db/c.cc +373 -57
  29. package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
  51. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  52. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
  53. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
  54. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
  55. package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
  56. package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
  57. package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
  58. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
  59. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
  60. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
  61. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
  62. package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
  63. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
  64. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
  65. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
  66. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
  79. package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
  80. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
  81. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
  82. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
  83. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
  84. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
  85. package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
  86. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
  87. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
  88. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
  90. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
  91. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
  92. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
  93. package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
  94. package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
  95. package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
  96. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
  97. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
  98. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
  99. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
  100. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
  101. package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
  102. package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
  103. package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
  104. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
  105. package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
  108. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
  109. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
  111. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
  112. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
  113. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
  114. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
  115. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  116. package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
  117. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
  118. package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
  119. package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
  120. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
  121. package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
  122. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
  123. package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
  124. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
  125. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
  126. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
  127. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
  128. package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
  129. package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
  130. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
  131. package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
  132. package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
  133. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
  134. package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
  135. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  136. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
  137. package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
  138. package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
  139. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
  140. package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
  141. package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
  142. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  143. package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
  144. package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
  145. package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
  146. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
  147. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
  148. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
  149. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
  150. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
  151. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
  159. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
  160. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
  161. package/deps/rocksdb/rocksdb/env/env.cc +1 -0
  162. package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
  163. package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
  164. package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
  165. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  166. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  167. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
  168. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
  169. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
  170. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
  171. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  172. package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
  173. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
  174. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
  175. package/deps/rocksdb/rocksdb/folly.mk +22 -5
  176. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
  177. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
  178. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
  179. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
  180. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
  181. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
  182. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
  183. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
  185. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
  186. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
  187. package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
  188. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
  189. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
  190. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
  191. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
  192. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
  193. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
  194. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
  198. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
  200. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
  202. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
  203. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
  204. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
  205. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
  206. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
  207. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
  208. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
  209. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
  210. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  211. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
  212. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
  213. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
  214. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
  215. package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
  216. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
  217. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
  218. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
  219. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
  220. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
  221. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
  222. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
  223. package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
  224. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  225. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
  226. package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
  227. package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
  228. package/deps/rocksdb/rocksdb/options/options.cc +5 -1
  229. package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
  230. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
  231. package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
  232. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
  233. package/deps/rocksdb/rocksdb/port/lang.h +4 -0
  234. package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
  235. package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
  236. package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
  237. package/deps/rocksdb/rocksdb/src.mk +12 -0
  238. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
  239. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  240. package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
  241. package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
  242. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
  243. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
  244. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
  245. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
  246. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
  247. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
  248. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
  249. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
  250. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
  251. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
  252. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
  253. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
  254. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
  255. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
  256. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
  257. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
  258. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  259. package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
  260. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
  261. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
  262. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
  264. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
  265. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
  266. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
  267. package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
  268. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
  269. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  270. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
  271. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
  272. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
  273. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
  274. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
  275. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
  276. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
  277. package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
  278. package/deps/rocksdb/rocksdb/table/format.cc +27 -15
  279. package/deps/rocksdb/rocksdb/table/format.h +41 -15
  280. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
  281. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
  282. package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
  283. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
  284. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  285. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
  286. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
  287. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
  288. package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
  289. package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
  290. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
  291. package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
  292. package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
  293. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
  294. package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
  295. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
  296. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
  297. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
  298. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
  299. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
  300. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
  301. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
  302. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
  303. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
  304. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
  305. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
  306. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
  307. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
  308. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
  309. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
  310. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
  311. package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
  312. package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
  313. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
  314. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
  315. package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
  316. package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
  317. package/deps/rocksdb/rocksdb/util/coding.h +14 -27
  318. package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
  319. package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
  320. package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
  321. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
  322. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
  323. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
  324. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
  325. package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
  326. package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
  327. package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
  328. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
  329. package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
  330. package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
  331. package/deps/rocksdb/rocksdb/util/math.h +3 -1
  332. package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
  333. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
  334. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
  335. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
  336. package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
  337. package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
  338. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
  339. package/deps/rocksdb/rocksdb/util/status.cc +3 -1
  340. package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
  341. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
  342. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
  343. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
  344. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
  345. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
  346. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
  347. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
  348. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
  349. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
  350. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
  351. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
  352. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
  353. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
  354. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
  355. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
  356. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
  357. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
  358. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  359. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
  360. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
  361. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
  362. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
  363. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
  364. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
  365. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
  366. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
  367. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
  368. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
  369. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
  370. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
  371. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
  372. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
  373. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
  374. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
  375. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
  376. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  377. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
  378. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
  379. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
  380. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
  381. package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
  382. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
  383. package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
  384. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
  385. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
  386. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
  387. package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
  388. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
  389. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  390. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  391. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
  392. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
  393. package/deps/rocksdb/rocksdb.gyp +7 -0
  394. package/iterator.js +2 -2
  395. package/package.json +1 -1
  396. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  397. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
@@ -234,8 +234,9 @@ class NonBatchedOpsStressTest : public StressTest {
234
234
 
235
235
  Status s = secondary_db_->TryCatchUpWithPrimary();
236
236
  #ifndef NDEBUG
237
- uint64_t manifest_num = static_cast_with_check<DBImpl>(secondary_db_)
238
- ->TEST_Current_Manifest_FileNo();
237
+ uint64_t manifest_num =
238
+ static_cast_with_check<DBImpl>(secondary_db_.get())
239
+ ->TEST_Current_Manifest_FileNo();
239
240
  #else
240
241
  uint64_t manifest_num = 0;
241
242
  #endif
@@ -542,7 +543,10 @@ class NonBatchedOpsStressTest : public StressTest {
542
543
  }
543
544
  std::unique_ptr<Iterator> iter(
544
545
  secondary_db_->NewIterator(read_opts, handle));
545
- uint32_t rnd = (thread->rand.Next()) % 4;
546
+ // Skip SeekToFirst, SeekToLast, SeekForPrev, and Prev when backward
547
+ // scan is disabled.
548
+ uint32_t rnd =
549
+ (!FLAGS_test_backward_scan) ? 2 : (thread->rand.Next()) % 4;
546
550
  if (0 == rnd) {
547
551
  // SeekToFirst() + Next()*5
548
552
  read_opts.total_order_seek = true;
@@ -1904,6 +1908,11 @@ class NonBatchedOpsStressTest : public StressTest {
1904
1908
  } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1905
1909
  initial_wal_write_may_succeed);
1906
1910
 
1911
+ if (IsExpectedTxnError(s)) {
1912
+ pending_expected_value.Rollback();
1913
+ return Status::OK();
1914
+ }
1915
+
1907
1916
  if (!s.ok()) {
1908
1917
  pending_expected_value.Rollback();
1909
1918
  if (IsErrorInjectedAndRetryable(s)) {
@@ -2004,6 +2013,11 @@ class NonBatchedOpsStressTest : public StressTest {
2004
2013
  } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
2005
2014
  initial_wal_write_may_succeed);
2006
2015
 
2016
+ if (IsExpectedTxnError(s)) {
2017
+ pending_expected_value.Rollback();
2018
+ return Status::OK();
2019
+ }
2020
+
2007
2021
  if (!s.ok()) {
2008
2022
  pending_expected_value.Rollback();
2009
2023
  if (IsErrorInjectedAndRetryable(s)) {
@@ -2071,6 +2085,11 @@ class NonBatchedOpsStressTest : public StressTest {
2071
2085
  } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
2072
2086
  initial_wal_write_may_succeed);
2073
2087
 
2088
+ if (IsExpectedTxnError(s)) {
2089
+ pending_expected_value.Rollback();
2090
+ return Status::OK();
2091
+ }
2092
+
2074
2093
  if (!s.ok()) {
2075
2094
  pending_expected_value.Rollback();
2076
2095
  if (IsErrorInjectedAndRetryable(s)) {
@@ -2644,76 +2663,78 @@ class NonBatchedOpsStressTest : public StressTest {
2644
2663
  op_logs += "N";
2645
2664
  }
2646
2665
 
2647
- // backward scan
2648
- key_str = Key(ub - 1);
2649
- iter->SeekForPrev(key_str);
2666
+ // backward scan — skip when backward iteration is not supported
2667
+ if (FLAGS_test_backward_scan) {
2668
+ key_str = Key(ub - 1);
2669
+ iter->SeekForPrev(key_str);
2650
2670
 
2651
- op_logs += " SFP " + Slice(key_str).ToString(true) + " ";
2671
+ op_logs += " SFP " + Slice(key_str).ToString(true) + " ";
2652
2672
 
2653
- last_key = ub;
2654
- while (true) {
2655
- assert(lb < last_key);
2673
+ last_key = ub;
2674
+ while (true) {
2675
+ assert(lb < last_key);
2656
2676
 
2657
- if (iter->Valid() && ro.allow_unprepared_value) {
2658
- op_logs += "*";
2677
+ if (iter->Valid() && ro.allow_unprepared_value) {
2678
+ op_logs += "*";
2659
2679
 
2660
- if (!iter->PrepareValue()) {
2661
- assert(!iter->Valid());
2662
- assert(!iter->status().ok());
2680
+ if (!iter->PrepareValue()) {
2681
+ assert(!iter->Valid());
2682
+ assert(!iter->status().ok());
2683
+ }
2663
2684
  }
2664
- }
2665
2685
 
2666
- if (!iter->Valid()) {
2667
- if (!iter->status().ok()) {
2668
- if (IsErrorInjectedAndRetryable(iter->status())) {
2669
- return iter->status();
2670
- } else {
2671
- thread->shared->SetVerificationFailure();
2672
- fprintf(stderr, "TestIterate against expected state error: %s\n",
2673
- iter->status().ToString().c_str());
2674
- fprintf(stderr, "Column family: %s, op_logs: %s\n",
2675
- cfh->GetName().c_str(), op_logs.c_str());
2676
- thread->stats.AddErrors(1);
2677
- return iter->status();
2686
+ if (!iter->Valid()) {
2687
+ if (!iter->status().ok()) {
2688
+ if (IsErrorInjectedAndRetryable(iter->status())) {
2689
+ return iter->status();
2690
+ } else {
2691
+ thread->shared->SetVerificationFailure();
2692
+ fprintf(stderr, "TestIterate against expected state error: %s\n",
2693
+ iter->status().ToString().c_str());
2694
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
2695
+ cfh->GetName().c_str(), op_logs.c_str());
2696
+ thread->stats.AddErrors(1);
2697
+ return iter->status();
2698
+ }
2678
2699
  }
2700
+ if (!check_no_key_in_range(lb, last_key)) {
2701
+ return Status::OK();
2702
+ }
2703
+ break;
2679
2704
  }
2680
- if (!check_no_key_in_range(lb, last_key)) {
2705
+
2706
+ if (!check_columns()) {
2681
2707
  return Status::OK();
2682
2708
  }
2683
- break;
2684
- }
2685
-
2686
- if (!check_columns()) {
2687
- return Status::OK();
2688
- }
2689
2709
 
2690
- // the range (current key, last key) was skipped
2691
- GetIntVal(iter->key().ToString(), &curr);
2692
- if (last_key <= static_cast<int64_t>(curr)) {
2693
- thread->shared->SetVerificationFailure();
2694
- fprintf(stderr,
2695
- "TestIterateAgainstExpected failed: found unexpectedly large "
2696
- "key\n");
2697
- fprintf(stderr, "Column family: %s, op_logs: %s\n",
2698
- cfh->GetName().c_str(), op_logs.c_str());
2699
- fprintf(stderr, "Last op found key: %s, expected at most: %s\n",
2700
- Slice(Key(curr)).ToString(true).c_str(),
2701
- Slice(Key(last_key - 1)).ToString(true).c_str());
2702
- thread->stats.AddErrors(1);
2703
- return Status::OK();
2704
- }
2705
- if (!check_no_key_in_range(static_cast<int64_t>(curr + 1), last_key)) {
2706
- return Status::OK();
2707
- }
2710
+ // the range (current key, last key) was skipped
2711
+ GetIntVal(iter->key().ToString(), &curr);
2712
+ if (last_key <= static_cast<int64_t>(curr)) {
2713
+ thread->shared->SetVerificationFailure();
2714
+ fprintf(stderr,
2715
+ "TestIterateAgainstExpected failed: found unexpectedly large "
2716
+ "key\n");
2717
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
2718
+ cfh->GetName().c_str(), op_logs.c_str());
2719
+ fprintf(stderr, "Last op found key: %s, expected at most: %s\n",
2720
+ Slice(Key(curr)).ToString(true).c_str(),
2721
+ Slice(Key(last_key - 1)).ToString(true).c_str());
2722
+ thread->stats.AddErrors(1);
2723
+ return Status::OK();
2724
+ }
2725
+ if (!check_no_key_in_range(static_cast<int64_t>(curr + 1), last_key)) {
2726
+ return Status::OK();
2727
+ }
2708
2728
 
2709
- last_key = static_cast<int64_t>(curr);
2710
- if (last_key <= lb) {
2711
- break;
2712
- }
2729
+ last_key = static_cast<int64_t>(curr);
2730
+ if (last_key <= lb) {
2731
+ break;
2732
+ }
2713
2733
 
2714
- iter->Prev();
2734
+ iter->Prev();
2715
2735
 
2716
- op_logs += "P";
2736
+ op_logs += "P";
2737
+ }
2717
2738
  }
2718
2739
 
2719
2740
  // Write-prepared/write-unprepared transactions and multi-CF iterator do not
@@ -2755,7 +2776,8 @@ class NonBatchedOpsStressTest : public StressTest {
2755
2776
  key_str = Key(mid);
2756
2777
  const Slice key(key_str);
2757
2778
 
2758
- if (thread->rand.OneIn(2)) {
2779
+ // Skip SeekForPrev and Prev when backward scan is not supported.
2780
+ if (!FLAGS_test_backward_scan || thread->rand.OneIn(2)) {
2759
2781
  iter->Seek(key);
2760
2782
  op_logs += " S " + key.ToString(true) + " ";
2761
2783
  if (!iter->Valid() && iter->status().ok()) {
@@ -2824,8 +2846,14 @@ class NonBatchedOpsStressTest : public StressTest {
2824
2846
  iter->Next();
2825
2847
  op_logs += "N";
2826
2848
  } else if (static_cast<int64_t>(curr) >= ub) {
2827
- iter->Prev();
2828
- op_logs += "P";
2849
+ // Use Next when backward scan is not supported.
2850
+ if (!FLAGS_test_backward_scan) {
2851
+ iter->Next();
2852
+ op_logs += "N";
2853
+ } else {
2854
+ iter->Prev();
2855
+ op_logs += "P";
2856
+ }
2829
2857
  } else {
2830
2858
  const uint32_t value_base_from_db = GetValueBase(iter->value());
2831
2859
  std::size_t index = static_cast<std::size_t>(curr - lb);
@@ -2852,7 +2880,7 @@ class NonBatchedOpsStressTest : public StressTest {
2852
2880
  break;
2853
2881
  }
2854
2882
 
2855
- if (thread->rand.OneIn(2)) {
2883
+ if (!FLAGS_test_backward_scan || thread->rand.OneIn(2)) {
2856
2884
  iter->Next();
2857
2885
  op_logs += "N";
2858
2886
  if (!iter->Valid()) {
@@ -3197,15 +3225,7 @@ class NonBatchedOpsStressTest : public StressTest {
3197
3225
  assert(false);
3198
3226
  }
3199
3227
 
3200
- // It is possible that multiple thread concurrently try to write to the
3201
- // same key, which could cause lock timeout or deadlock in the
3202
- // transactiondb layer, before transaction is rolled back.
3203
- // E.g.
3204
- // Timestamp 1: Transaction A: lock key M for write
3205
- // Timestamp 2: Transaction B: lock key N for write
3206
- // Timestamp 3: Transaction B: try to lock key M for write -> wait
3207
- // Timestamp 4: Transaction A: try to lock key N for write -> deadlock
3208
- if (s.IsTimedOut() || s.IsDeadlock()) {
3228
+ if (IsExpectedTxnError(s)) {
3209
3229
  return;
3210
3230
  }
3211
3231
 
@@ -27,6 +27,7 @@
27
27
  #include "rocksdb/utilities/object_registry.h"
28
28
  #include "rocksdb/utilities/options_type.h"
29
29
  #include "util/autovector.h"
30
+ #include "util/string_util.h"
30
31
 
31
32
  namespace ROCKSDB_NAMESPACE {
32
33
  namespace {
@@ -41,6 +41,9 @@
41
41
  #include "env/env_chroot.h"
42
42
  #include "env/env_encryption_ctr.h"
43
43
  #include "env/fs_readonly.h"
44
+ #if defined(ROCKSDB_IOURING_PRESENT)
45
+ #include "env/io_posix.h"
46
+ #endif
44
47
  #include "env/mock_env.h"
45
48
  #include "env/unique_id_gen.h"
46
49
  #include "logging/log_buffer.h"
@@ -2505,7 +2508,7 @@ TEST_P(EnvFSTestWithParam, OptionsTest) {
2505
2508
  }
2506
2509
  }
2507
2510
  for (int i = 0; i < 2; ++i) {
2508
- DB* db;
2511
+ std::unique_ptr<DB> db;
2509
2512
  Status s = DB::Open(opts, dbname, &db);
2510
2513
  ASSERT_OK(s);
2511
2514
 
@@ -2523,7 +2526,7 @@ TEST_P(EnvFSTestWithParam, OptionsTest) {
2523
2526
  ASSERT_EQ("b", val);
2524
2527
 
2525
2528
  ASSERT_OK(db->Close());
2526
- delete db;
2529
+ db.reset();
2527
2530
  ASSERT_OK(DestroyDB(dbname, opts));
2528
2531
 
2529
2532
  dbname = dbname2_;
@@ -3725,6 +3728,366 @@ TEST_F(TestAsyncRead, InterleavingIOUringOperations) {
3725
3728
  #endif
3726
3729
  }
3727
3730
 
3731
+ // Helper function to run AbortIO test with parameterized read requests.
3732
+ // Each request is specified as {offset, length}.
3733
+ // use_direct_io: if true, opens the file with O_DIRECT to bypass page cache.
3734
+ // iterations: number of times to repeat the test (useful for race conditions).
3735
+ void TestAbortIOWithRequests(
3736
+ Env* env, size_t file_size,
3737
+ const std::vector<std::pair<uint64_t, size_t>>& read_specs,
3738
+ bool use_direct_io = false, int iterations = 1) {
3739
+ #if defined(ROCKSDB_IOURING_PRESENT)
3740
+ fprintf(stderr,
3741
+ "TestAbortIOWithRequests: file_size=%zu, num_reads=%zu, "
3742
+ "direct_io=%d, iterations=%d\n",
3743
+ file_size, read_specs.size(), use_direct_io, iterations);
3744
+ std::shared_ptr<FileSystem> fs = env->GetFileSystem();
3745
+ std::string fname = test::PerThreadDBPath(env, "testfile_abortio");
3746
+
3747
+ // 1. Create test file once (content doesn't change between iterations)
3748
+ {
3749
+ std::unique_ptr<FSWritableFile> wfile;
3750
+ FileOptions file_opts;
3751
+ file_opts.use_direct_writes = true;
3752
+ ASSERT_OK(fs->NewWritableFile(fname, file_opts, &wfile, nullptr));
3753
+
3754
+ // Query the file's required buffer alignment (logical block size)
3755
+ // instead of hardcoding 4096, to support devices with different
3756
+ // sector sizes.
3757
+ size_t sector_size = wfile->GetRequiredBufferAlignment();
3758
+
3759
+ // Round up to full sectors for direct IO writes
3760
+ size_t num_sectors = (file_size + sector_size - 1) / sector_size;
3761
+ for (size_t i = 0; i < num_sectors; ++i) {
3762
+ auto data = NewAligned(sector_size, static_cast<char>(i + 1));
3763
+ Slice slice(data.get(), sector_size);
3764
+ ASSERT_OK(wfile->Append(slice, IOOptions(), nullptr));
3765
+ }
3766
+
3767
+ // Truncate to exact file size if not aligned to sector boundary
3768
+ if (file_size % sector_size != 0) {
3769
+ ASSERT_OK(wfile->Truncate(file_size, IOOptions(), nullptr));
3770
+ }
3771
+
3772
+ ASSERT_OK(wfile->Close(IOOptions(), nullptr));
3773
+ }
3774
+
3775
+ for (int iter = 0; iter < iterations; iter++) {
3776
+ // 2. Submit ReadAsync requests and immediately abort
3777
+ {
3778
+ FileOptions file_opts;
3779
+ file_opts.use_direct_reads = use_direct_io;
3780
+ std::unique_ptr<FSRandomAccessFile> file;
3781
+ ASSERT_OK(fs->NewRandomAccessFile(fname, file_opts, &file, nullptr));
3782
+
3783
+ const size_t num_reads = read_specs.size();
3784
+ IOOptions opts;
3785
+ std::vector<void*> io_handles(num_reads);
3786
+ std::vector<FSReadRequest> reqs(num_reads);
3787
+ std::vector<std::unique_ptr<char, Deleter>> data;
3788
+ std::vector<size_t> vals;
3789
+ IOHandleDeleter del_fn;
3790
+ std::atomic<int> callbacks_invoked{0};
3791
+
3792
+ // Initialize read requests from specs
3793
+ for (size_t i = 0; i < num_reads; i++) {
3794
+ reqs[i].offset = read_specs[i].first;
3795
+ reqs[i].len = read_specs[i].second;
3796
+ data.emplace_back(NewAligned(reqs[i].len, 0));
3797
+ reqs[i].scratch = data.back().get();
3798
+ vals.push_back(i);
3799
+ }
3800
+
3801
+ // Callback
3802
+ std::function<void(FSReadRequest&, void*)> callback =
3803
+ [&](FSReadRequest& req, void* cb_arg) {
3804
+ size_t i = *(reinterpret_cast<size_t*>(cb_arg));
3805
+ reqs[i].status = req.status;
3806
+ callbacks_invoked++;
3807
+ };
3808
+
3809
+ // Submit all ReadAsync requests
3810
+ for (size_t i = 0; i < num_reads; i++) {
3811
+ void* cb_arg = static_cast<void*>(&(vals[i]));
3812
+ IOStatus s = file->ReadAsync(reqs[i], opts, callback, cb_arg,
3813
+ &(io_handles[i]), &del_fn, nullptr);
3814
+ if (s.IsNotSupported()) {
3815
+ // io_uring not supported, clean up and skip
3816
+ fprintf(stderr,
3817
+ "WARNING: io_uring not supported, skipping test: %s\n",
3818
+ s.ToString().c_str());
3819
+ for (size_t j = 0; j < i; j++) {
3820
+ if (io_handles[j]) {
3821
+ del_fn(io_handles[j]);
3822
+ }
3823
+ }
3824
+ ASSERT_OK(fs->DeleteFile(fname, IOOptions(), nullptr));
3825
+ return;
3826
+ }
3827
+ ASSERT_OK(s);
3828
+ }
3829
+
3830
+ // Immediately call AbortIO - this should NOT hang
3831
+ ASSERT_OK(fs->AbortIO(io_handles));
3832
+
3833
+ // Verify all handles are finished and all callbacks were invoked.
3834
+ // Since all handles are passed to AbortIO, every handle is guaranteed
3835
+ // to be finalized (either completed or cancelled).
3836
+ for (size_t i = 0; i < num_reads; i++) {
3837
+ Posix_IOHandle* h = static_cast<Posix_IOHandle*>(io_handles[i]);
3838
+ ASSERT_TRUE(h->is_finished);
3839
+ }
3840
+ ASSERT_EQ(callbacks_invoked.load(), static_cast<int>(num_reads));
3841
+
3842
+ // Clean up handles
3843
+ for (size_t i = 0; i < num_reads; i++) {
3844
+ if (io_handles[i]) {
3845
+ del_fn(io_handles[i]);
3846
+ }
3847
+ }
3848
+ }
3849
+ }
3850
+
3851
+ ASSERT_OK(fs->DeleteFile(fname, IOOptions(), nullptr));
3852
+
3853
+ fprintf(stderr, "TestAbortIOWithRequests: completed %d iterations\n",
3854
+ iterations);
3855
+ #else
3856
+ fprintf(stderr,
3857
+ "TestAbortIOWithRequests: SKIPPED (ROCKSDB_IOURING_PRESENT not "
3858
+ "defined)\n");
3859
+ (void)env;
3860
+ (void)file_size;
3861
+ (void)read_specs;
3862
+ (void)use_direct_io;
3863
+ (void)iterations;
3864
+ #endif
3865
+ }
3866
+
3867
+ // Test overlapping reads at aligned offsets (multiples of 4KB)
3868
+ TEST_F(TestAsyncRead, AbortIOOverlappingAligned) {
3869
+ // 4 reads of 16KB each, overlapping by 8KB, all at 4KB-aligned offsets
3870
+ // Read 0: [0, 16KB), Read 1: [8KB, 24KB), Read 2: [16KB, 32KB), Read 3:
3871
+ // [24KB, 40KB)
3872
+ std::vector<std::pair<uint64_t, size_t>> specs = {
3873
+ {0, 16384},
3874
+ {8192, 16384},
3875
+ {16384, 16384},
3876
+ {24576, 16384},
3877
+ };
3878
+ TestAbortIOWithRequests(env_, 64 * 1024, specs);
3879
+ }
3880
+
3881
+ // Test reads at unaligned offsets (not multiples of 4KB)
3882
+ TEST_F(TestAsyncRead, AbortIOUnalignedOffsets) {
3883
+ // Reads starting at non-4KB-aligned offsets
3884
+ std::vector<std::pair<uint64_t, size_t>> specs = {
3885
+ {1000, 8192}, // starts at 1000 (unaligned)
3886
+ {5000, 12288}, // starts at 5000 (unaligned), spans multiple sectors
3887
+ {15000, 8192}, // starts at 15000 (unaligned)
3888
+ {25500, 16384}, // starts at 25500 (unaligned)
3889
+ };
3890
+ TestAbortIOWithRequests(env_, 64 * 1024, specs);
3891
+ }
3892
+
3893
+ // Test mix of aligned and unaligned, various sizes
3894
+ TEST_F(TestAsyncRead, AbortIOMixedOffsets) {
3895
+ std::vector<std::pair<uint64_t, size_t>> specs = {
3896
+ {0, 4096}, // aligned, 1 sector
3897
+ {1500, 8192}, // unaligned, 2 sectors
3898
+ {4096, 20480}, // aligned, 5 sectors
3899
+ {7000, 4096}, // unaligned, spans 2 sectors
3900
+ {16384, 32768}, // aligned, 8 sectors
3901
+ {50000, 8192}, // unaligned
3902
+ };
3903
+ TestAbortIOWithRequests(env_, 128 * 1024, specs);
3904
+ }
3905
+
3906
+ // Stress test with many concurrent handles
3907
+ TEST_F(TestAsyncRead, AbortIOStress) {
3908
+ std::vector<std::pair<uint64_t, size_t>> specs;
3909
+ // 16 overlapping reads with mixed alignment
3910
+ for (int i = 0; i < 16; i++) {
3911
+ uint64_t offset = i * 4000; // Not aligned to 4KB
3912
+ size_t len = 8192 + (i % 4) * 4096; // 8KB to 20KB
3913
+ specs.emplace_back(offset, len);
3914
+ }
3915
+ TestAbortIOWithRequests(env_, 256 * 1024, specs);
3916
+ }
3917
+
3918
+ // Regression test for a fixed bug in AbortIO where out-of-order io_uring
3919
+ // completions could cause an infinite hang. The bug occurred when completions
3920
+ // for a different handle arrived while waiting for the current handle - the
3921
+ // code would consume those completions but not mark the handle as finished,
3922
+ // causing a hang when later iterating to that handle.
3923
+ //
3924
+ // Uses a large read (1MB) followed by a small read (4KB) with Direct I/O to
3925
+ // maximize the chance of out-of-order completions. Runs 100 iterations to
3926
+ // increase the likelihood of triggering the race condition.
3927
+ TEST_F(TestAsyncRead, AbortIOReversedHandles) {
3928
+ // Request 0: LARGE (1MB) at offset 0
3929
+ // Request 1: SMALL (4KB) at offset 1MB
3930
+ std::vector<std::pair<uint64_t, size_t>> specs = {
3931
+ {0, 1024 * 1024}, // 1MB read
3932
+ {1024 * 1024, 4096}, // 4KB read at 1MB offset
3933
+ };
3934
+ // 2MB file, Direct I/O enabled, 100 iterations
3935
+ TestAbortIOWithRequests(env_, 2 * 1024 * 1024, specs,
3936
+ /*use_direct_io=*/true, /*iterations=*/100);
3937
+ }
3938
+
3939
+ // Test for bug fix: AbortIO with partial handles should correctly handle
3940
+ // completions for non-aborted handles.
3941
+ //
3942
+ // Previously, AbortIO would consume completions for non-aborted handles but
3943
+ // not set is_finished (since it expected req_count==2 for all handles).
3944
+ // This caused subsequent Poll calls to hang forever.
3945
+ //
3946
+ // The fix correctly detects handles not in the abort set and finalizes them
3947
+ // immediately when their completion arrives (at req_count==1).
3948
+ TEST_F(TestAsyncRead, AbortIOPartialHandlesBug) {
3949
+ #if defined(ROCKSDB_IOURING_PRESENT)
3950
+ std::shared_ptr<FileSystem> fs = env_->GetFileSystem();
3951
+ std::string fname = test::PerThreadDBPath(env_, "testfile_abortio_partial");
3952
+
3953
+ constexpr size_t kSectorSize = 4096;
3954
+ constexpr size_t kFileSize = 2 * 1024 * 1024; // 2MB
3955
+
3956
+ // 1. Create test file with direct I/O
3957
+ {
3958
+ std::unique_ptr<FSWritableFile> wfile;
3959
+ FileOptions file_opts;
3960
+ file_opts.use_direct_writes = true;
3961
+ ASSERT_OK(fs->NewWritableFile(fname, file_opts, &wfile, nullptr));
3962
+
3963
+ size_t num_sectors = kFileSize / kSectorSize;
3964
+ for (size_t i = 0; i < num_sectors; ++i) {
3965
+ auto data = NewAligned(kSectorSize, static_cast<char>(i + 1));
3966
+ Slice slice(data.get(), kSectorSize);
3967
+ ASSERT_OK(wfile->Append(slice, IOOptions(), nullptr));
3968
+ }
3969
+ ASSERT_OK(wfile->Close(IOOptions(), nullptr));
3970
+ }
3971
+
3972
+ // 2. Submit 3 ReadAsync requests, abort only the first one, then Poll the
3973
+ // rest
3974
+ {
3975
+ FileOptions file_opts;
3976
+ file_opts.use_direct_reads = true;
3977
+ std::unique_ptr<FSRandomAccessFile> file;
3978
+ ASSERT_OK(fs->NewRandomAccessFile(fname, file_opts, &file, nullptr));
3979
+
3980
+ IOOptions opts;
3981
+ constexpr size_t kNumReads = 3;
3982
+ std::vector<void*> io_handles(kNumReads);
3983
+ std::vector<FSReadRequest> reqs(kNumReads);
3984
+ std::vector<std::unique_ptr<char, Deleter>> data;
3985
+ std::vector<size_t> vals;
3986
+ IOHandleDeleter del_fn;
3987
+ std::atomic<int> callbacks_invoked{0};
3988
+
3989
+ // H0: 1MB read, H1: 4KB read, H2: 4KB read
3990
+ std::vector<std::pair<uint64_t, size_t>> read_specs = {
3991
+ {0, 1024 * 1024}, // H0: 1MB at offset 0
3992
+ {1024 * 1024, 4096}, // H1: 4KB at offset 1MB
3993
+ {1024 * 1024 + 4096, 4096}, // H2: 4KB at offset 1MB+4KB
3994
+ };
3995
+
3996
+ for (size_t i = 0; i < kNumReads; i++) {
3997
+ reqs[i].offset = read_specs[i].first;
3998
+ reqs[i].len = read_specs[i].second;
3999
+ data.emplace_back(NewAligned(reqs[i].len, 0));
4000
+ reqs[i].scratch = data.back().get();
4001
+ vals.push_back(i);
4002
+ }
4003
+
4004
+ std::function<void(FSReadRequest&, void*)> callback =
4005
+ [&](FSReadRequest& req, void* cb_arg) {
4006
+ size_t i = *(reinterpret_cast<size_t*>(cb_arg));
4007
+ reqs[i].status = req.status;
4008
+ callbacks_invoked++;
4009
+ };
4010
+
4011
+ // Submit all ReadAsync requests
4012
+ for (size_t i = 0; i < kNumReads; i++) {
4013
+ void* cb_arg = static_cast<void*>(&(vals[i]));
4014
+ IOStatus s = file->ReadAsync(reqs[i], opts, callback, cb_arg,
4015
+ &(io_handles[i]), &del_fn, nullptr);
4016
+ if (s.IsNotSupported()) {
4017
+ // io_uring not supported, clean up and skip
4018
+ for (size_t j = 0; j < i; j++) {
4019
+ if (io_handles[j]) {
4020
+ del_fn(io_handles[j]);
4021
+ }
4022
+ }
4023
+ ASSERT_OK(fs->DeleteFile(fname, IOOptions(), nullptr));
4024
+ return;
4025
+ }
4026
+ ASSERT_OK(s);
4027
+ }
4028
+
4029
+ // Wait for reads to complete in io_uring (completions in queue but not
4030
+ // consumed). 5 seconds should be plenty for direct I/O reads to complete.
4031
+ std::this_thread::sleep_for(std::chrono::seconds(5));
4032
+
4033
+ // Abort ONLY H0 - this will consume all completions but should correctly
4034
+ // finalize H1 and H2 (since they're not in the abort set).
4035
+ std::vector<void*> abort_handles = {io_handles[0]};
4036
+ ASSERT_OK(fs->AbortIO(abort_handles));
4037
+
4038
+ // Verify H0 is finished (aborted)
4039
+ Posix_IOHandle* h0 = static_cast<Posix_IOHandle*>(io_handles[0]);
4040
+ ASSERT_TRUE(h0->is_finished);
4041
+ ASSERT_EQ(h0->req_count, 2u); // original + cancel
4042
+
4043
+ // Note: H1 and H2 may or may not be finished at this point. AbortIO
4044
+ // finalizes non-aborted handles whose CQEs arrive while waiting for
4045
+ // aborted handles, but CQE ordering is non-deterministic. If H0's
4046
+ // completions arrived first, H1/H2's CQEs are still in the queue.
4047
+ // Poll handles either case correctly.
4048
+
4049
+ // Poll on H1, H2 - completes them if not already finalized by AbortIO
4050
+ std::vector<void*> poll_handles = {io_handles[1], io_handles[2]};
4051
+
4052
+ // Use a watchdog to detect hang (regression test for the original bug
4053
+ // where AbortIO consumed non-aborted CQEs without finalizing them)
4054
+ std::atomic<bool> poll_completed{false};
4055
+ std::thread watchdog([&]() {
4056
+ for (int i = 0; i < 500; i++) { // 5 seconds timeout
4057
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
4058
+ if (poll_completed) return;
4059
+ }
4060
+ // Bug regression: Poll hung
4061
+ _exit(1);
4062
+ });
4063
+
4064
+ fs->Poll(poll_handles, poll_handles.size());
4065
+ poll_completed = true;
4066
+ watchdog.join();
4067
+
4068
+ // After Poll, H1 and H2 must be finished
4069
+ Posix_IOHandle* h1 = static_cast<Posix_IOHandle*>(io_handles[1]);
4070
+ Posix_IOHandle* h2 = static_cast<Posix_IOHandle*>(io_handles[2]);
4071
+ ASSERT_TRUE(h1->is_finished);
4072
+ ASSERT_TRUE(h2->is_finished);
4073
+
4074
+ // Verify all callbacks were invoked
4075
+ ASSERT_EQ(callbacks_invoked.load(), 3);
4076
+
4077
+ // Clean up handles
4078
+ for (size_t i = 0; i < kNumReads; i++) {
4079
+ if (io_handles[i]) {
4080
+ del_fn(io_handles[i]);
4081
+ }
4082
+ }
4083
+ }
4084
+
4085
+ ASSERT_OK(fs->DeleteFile(fname, IOOptions(), nullptr));
4086
+ #else
4087
+ (void)env_; // Suppress unused variable warning
4088
+ #endif
4089
+ }
4090
+
3728
4091
  struct StaticDestructionTester {
3729
4092
  bool activated = false;
3730
4093
  ~StaticDestructionTester() {