@nxtedition/rocksdb 9.0.0 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/binding.cc +0 -21
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -9
  3. package/deps/rocksdb/rocksdb/Makefile +15 -6
  4. package/deps/rocksdb/rocksdb/README.md +29 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +17 -2
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +74 -15
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
  10. package/deps/rocksdb/rocksdb/cache/cache_test.cc +16 -4
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
  12. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2024 -14
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.h +349 -23
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +126 -51
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -0
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +182 -7
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +31 -14
  19. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -33
  20. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +293 -17
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +21 -5
  22. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +10 -0
  23. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -3
  24. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
  25. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
  26. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
  27. package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
  28. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +25 -11
  29. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
  30. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -1
  32. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
  34. package/deps/rocksdb/rocksdb/db/builder.cc +3 -3
  35. package/deps/rocksdb/rocksdb/db/c.cc +64 -0
  36. package/deps/rocksdb/rocksdb/db/c_test.c +36 -0
  37. package/deps/rocksdb/rocksdb/db/column_family.cc +23 -15
  38. package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
  39. package/deps/rocksdb/rocksdb/db/column_family_test.cc +101 -5
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +36 -23
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +24 -10
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +3 -5
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -18
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +7 -3
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -6
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +61 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +146 -64
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +13 -39
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -0
  53. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -7
  54. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +8 -3
  55. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +59 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +186 -2
  58. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +17 -5
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +519 -240
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +104 -43
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +169 -66
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -1
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +12 -4
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -14
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +85 -53
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -7
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +99 -82
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +4 -14
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +24 -21
  71. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
  72. package/deps/rocksdb/rocksdb/db/db_iter.cc +83 -55
  73. package/deps/rocksdb/rocksdb/db/db_iter.h +10 -2
  74. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
  75. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
  76. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
  77. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +187 -1
  78. package/deps/rocksdb/rocksdb/db/db_options_test.cc +258 -0
  79. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +258 -0
  80. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +1 -0
  81. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
  82. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
  83. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
  84. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +3 -1
  85. package/deps/rocksdb/rocksdb/db/db_test.cc +134 -30
  86. package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
  87. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -6
  88. package/deps/rocksdb/rocksdb/db/db_test_util.h +5 -2
  89. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +1 -0
  90. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -0
  91. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +337 -1
  92. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
  93. package/deps/rocksdb/rocksdb/db/error_handler.cc +51 -34
  94. package/deps/rocksdb/rocksdb/db/error_handler.h +7 -6
  95. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +58 -0
  96. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -3
  98. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +2 -1
  99. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +2 -0
  100. package/deps/rocksdb/rocksdb/db/memtable.cc +18 -70
  101. package/deps/rocksdb/rocksdb/db/memtable_list.cc +1 -1
  102. package/deps/rocksdb/rocksdb/db/memtable_list.h +11 -1
  103. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +1 -1
  104. package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
  105. package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
  106. package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
  107. package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
  108. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +3 -0
  109. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
  110. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
  112. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
  113. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
  114. package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
  115. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +454 -70
  116. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
  117. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
  118. package/deps/rocksdb/rocksdb/db/table_cache.cc +32 -19
  119. package/deps/rocksdb/rocksdb/db/table_cache.h +12 -6
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +10 -4
  121. package/deps/rocksdb/rocksdb/db/version_set.cc +75 -73
  122. package/deps/rocksdb/rocksdb/db/version_set.h +8 -8
  123. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
  124. package/deps/rocksdb/rocksdb/db/version_set_test.cc +22 -11
  125. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
  126. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
  127. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
  128. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
  129. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
  130. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
  131. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +44 -20
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +15 -10
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +108 -58
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +36 -14
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +1 -1
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +195 -130
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -2
  144. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +12 -12
  145. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
  146. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +14 -11
  149. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +44 -38
  150. package/deps/rocksdb/rocksdb/env/env.cc +5 -0
  151. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +1 -0
  152. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +50 -29
  153. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +32 -2
  154. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +513 -30
  155. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
  156. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +38 -13
  157. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -7
  158. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +42 -0
  159. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +65 -12
  160. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
  161. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +26 -0
  162. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +37 -4
  163. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +2 -0
  164. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -0
  165. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
  166. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
  167. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +4 -0
  168. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
  169. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
  170. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +45 -5
  171. package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
  173. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +79 -8
  174. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +16 -0
  175. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
  176. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
  177. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +14 -3
  178. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +2 -0
  179. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +9 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -2
  186. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -2
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
  188. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  189. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  190. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  191. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
  192. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +4 -0
  193. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -0
  194. package/deps/rocksdb/rocksdb/options/db_options.cc +47 -2
  195. package/deps/rocksdb/rocksdb/options/db_options.h +3 -0
  196. package/deps/rocksdb/rocksdb/options/options_helper.cc +12 -0
  197. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  198. package/deps/rocksdb/rocksdb/options/options_test.cc +6 -1
  199. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  200. package/deps/rocksdb/rocksdb/port/README +10 -0
  201. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  202. package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
  203. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
  204. package/deps/rocksdb/rocksdb/port/stack_trace.cc +5 -0
  205. package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
  206. package/deps/rocksdb/rocksdb/src.mk +7 -1
  207. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  208. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +3 -1
  209. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +275 -61
  210. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +96 -4
  211. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +179 -62
  212. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +35 -22
  213. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
  214. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +14 -9
  215. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
  216. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
  217. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +15 -12
  218. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +10 -5
  219. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +39 -18
  220. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
  221. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
  222. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  224. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +10 -8
  225. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -2
  226. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  228. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +3 -2
  229. package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
  230. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +6 -2
  231. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
  232. package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
  233. package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
  234. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
  235. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +12 -0
  236. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +33 -6
  237. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
  238. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +2 -4
  239. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
  240. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
  241. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
  242. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
  243. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -43
  244. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -28
  245. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
  246. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +0 -1
  247. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -10
  248. package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
  249. package/deps/rocksdb/rocksdb/util/cast_util.h +10 -0
  250. package/deps/rocksdb/rocksdb/util/comparator.cc +26 -1
  251. package/deps/rocksdb/rocksdb/util/compression.h +9 -3
  252. package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
  253. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
  254. package/deps/rocksdb/rocksdb/util/overload.h +23 -0
  255. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
  256. package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
  257. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
  258. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
  259. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  260. package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
  261. package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
  262. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -0
  263. package/deps/rocksdb/rocksdb/util/udt_util.cc +42 -0
  264. package/deps/rocksdb/rocksdb/util/udt_util.h +19 -0
  265. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +14 -0
  266. package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
  267. package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
  268. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2 -1
  269. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
  270. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +19 -15
  271. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
  272. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +5 -0
  273. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
  274. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +9 -0
  275. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
  276. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
  277. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +41 -0
  278. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
  279. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -0
  280. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +155 -0
  281. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
  282. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +81 -1
  283. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -6
  284. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +7 -5
  285. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +2 -1
  286. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -2
  287. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -27
  288. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
  289. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
  290. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +105 -8
  291. package/deps/rocksdb/rocksdb.gyp +4 -2
  292. package/index.js +0 -8
  293. package/package.json +1 -1
  294. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  295. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
  296. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
  297. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
  298. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
  299. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
  300. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
  301. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
  302. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
  303. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
  304. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
@@ -23,6 +23,7 @@
23
23
  #include "util/cast_util.h"
24
24
  #include "util/coding.h"
25
25
  #include "util/concurrent_task_limiter_impl.h"
26
+ #include "util/udt_util.h"
26
27
 
27
28
  namespace ROCKSDB_NAMESPACE {
28
29
 
@@ -112,7 +113,8 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
112
113
  }
113
114
 
114
115
  IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
115
- VersionEdit* synced_wals) {
116
+ VersionEdit* synced_wals,
117
+ bool error_recovery_in_prog) {
116
118
  TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
117
119
  InstrumentedMutexLock l(&log_write_mutex_);
118
120
  autovector<log::Writer*, 1> logs_to_sync;
@@ -138,7 +140,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
138
140
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
139
141
  "[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
140
142
  log->get_log_number());
141
- if (error_handler_.IsRecoveryInProgress()) {
143
+ if (error_recovery_in_prog) {
142
144
  log->file()->reset_seen_error();
143
145
  }
144
146
  io_s = log->file()->Sync(immutable_db_options_.use_fsync);
@@ -147,7 +149,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
147
149
  }
148
150
 
149
151
  if (immutable_db_options_.recycle_log_file_num > 0) {
150
- if (error_handler_.IsRecoveryInProgress()) {
152
+ if (error_recovery_in_prog) {
151
153
  log->file()->reset_seen_error();
152
154
  }
153
155
  io_s = log->Close();
@@ -221,9 +223,10 @@ Status DBImpl::FlushMemTableToOutputFile(
221
223
  // `snapshot_seqs` has already been computed before this function starts.
222
224
  // Recording the max memtable ID ensures that the flush job does not flush
223
225
  // a memtable without knowing such snapshot(s).
224
- uint64_t max_memtable_id = needs_to_sync_closed_wals
225
- ? cfd->imm()->GetLatestMemTableID()
226
- : std::numeric_limits<uint64_t>::max();
226
+ uint64_t max_memtable_id =
227
+ needs_to_sync_closed_wals
228
+ ? cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */)
229
+ : std::numeric_limits<uint64_t>::max();
227
230
 
228
231
  // If needs_to_sync_closed_wals is false, then the flush job will pick ALL
229
232
  // existing memtables of the column family when PickMemTable() is called
@@ -232,7 +235,7 @@ Status DBImpl::FlushMemTableToOutputFile(
232
235
  // releases and re-acquires the db mutex. In the meantime, the application
233
236
  // can still insert into the memtables and increase the db's sequence number.
234
237
  // The application can take a snapshot, hoping that the latest visible state
235
- // to this snapshto is preserved. This is hard to guarantee since db mutex
238
+ // to this snapshot is preserved. This is hard to guarantee since db mutex
236
239
  // not held. This newly-created snapshot is not included in `snapshot_seqs`
237
240
  // and the flush job is unaware of its presence. Consequently, the flush job
238
241
  // may drop certain keys when generating the L0, causing incorrect data to be
@@ -249,7 +252,7 @@ Status DBImpl::FlushMemTableToOutputFile(
249
252
  GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
250
253
  &event_logger_, mutable_cf_options.report_bg_io_stats,
251
254
  true /* sync_output_directory */, true /* write_manifest */, thread_pri,
252
- io_tracer_, seqno_time_mapping_, db_id_, db_session_id_,
255
+ io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_,
253
256
  cfd->GetFullHistoryTsLow(), &blob_callback_);
254
257
  FileMetaData file_meta;
255
258
 
@@ -260,8 +263,10 @@ Status DBImpl::FlushMemTableToOutputFile(
260
263
  // SyncClosedLogs() may unlock and re-lock the log_write_mutex multiple
261
264
  // times.
262
265
  VersionEdit synced_wals;
266
+ bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
263
267
  mutex_.Unlock();
264
- log_io_s = SyncClosedLogs(job_context, &synced_wals);
268
+ log_io_s =
269
+ SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
265
270
  mutex_.Lock();
266
271
  if (log_io_s.ok() && synced_wals.IsWalAddition()) {
267
272
  const ReadOptions read_options(Env::IOActivity::kFlush);
@@ -521,7 +526,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
521
526
  GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
522
527
  &event_logger_, mutable_cf_options.report_bg_io_stats,
523
528
  false /* sync_output_directory */, false /* write_manifest */,
524
- thread_pri, io_tracer_, seqno_time_mapping_, db_id_, db_session_id_,
529
+ thread_pri, io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_,
525
530
  cfd->GetFullHistoryTsLow(), &blob_callback_));
526
531
  }
527
532
 
@@ -545,8 +550,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
545
550
  // TODO (yanqin) investigate whether we should sync the closed logs for
546
551
  // single column family case.
547
552
  VersionEdit synced_wals;
553
+ bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
548
554
  mutex_.Unlock();
549
- log_io_s = SyncClosedLogs(job_context, &synced_wals);
555
+ log_io_s =
556
+ SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
550
557
  mutex_.Lock();
551
558
  if (log_io_s.ok() && synced_wals.IsWalAddition()) {
552
559
  const ReadOptions read_options(Env::IOActivity::kFlush);
@@ -1005,26 +1012,14 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
1005
1012
  end_without_ts, "" /*trim_ts*/);
1006
1013
  }
1007
1014
 
1008
- std::string begin_str;
1009
- std::string end_str;
1010
-
1011
- // CompactRange compact all keys: [begin, end] inclusively. Add maximum
1012
- // timestamp to include all `begin` keys, and add minimal timestamp to include
1013
- // all `end` keys.
1014
- if (begin_without_ts != nullptr) {
1015
- AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz);
1016
- }
1017
- if (end_without_ts != nullptr) {
1018
- AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz);
1019
- }
1020
- Slice begin(begin_str);
1021
- Slice end(end_str);
1015
+ std::string begin_str, end_str;
1016
+ auto [begin, end] =
1017
+ MaybeAddTimestampsToRange(begin_without_ts, end_without_ts, ts_sz,
1018
+ &begin_str, &end_str, false /*exclusive_end*/);
1022
1019
 
1023
- Slice* begin_with_ts = begin_without_ts ? &begin : nullptr;
1024
- Slice* end_with_ts = end_without_ts ? &end : nullptr;
1025
-
1026
- return CompactRangeInternal(options, column_family, begin_with_ts,
1027
- end_with_ts, "" /*trim_ts*/);
1020
+ return CompactRangeInternal(
1021
+ options, column_family, begin.has_value() ? &begin.value() : nullptr,
1022
+ end.has_value() ? &end.value() : nullptr, "" /*trim_ts*/);
1028
1023
  }
1029
1024
 
1030
1025
  Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
@@ -1531,7 +1526,8 @@ Status DBImpl::CompactFilesImpl(
1531
1526
  // without releasing the lock, so we're guaranteed a compaction can be formed.
1532
1527
  assert(c != nullptr);
1533
1528
 
1534
- c->SetInputVersion(version);
1529
+ c->FinalizeInputInfo(version);
1530
+
1535
1531
  // deletion compaction currently not allowed in CompactFiles.
1536
1532
  assert(!c->deletion_compaction());
1537
1533
 
@@ -1581,7 +1577,12 @@ Status DBImpl::CompactFilesImpl(
1581
1577
  TEST_SYNC_POINT("CompactFilesImpl:3");
1582
1578
  mutex_.Lock();
1583
1579
 
1584
- Status status = compaction_job.Install(*c->mutable_cf_options());
1580
+ bool compaction_released = false;
1581
+ Status status =
1582
+ compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
1583
+ if (!compaction_released) {
1584
+ c->ReleaseCompactionFiles(s);
1585
+ }
1585
1586
  if (status.ok()) {
1586
1587
  assert(compaction_job.io_status().ok());
1587
1588
  InstallSuperVersionAndScheduleWork(c->column_family_data(),
@@ -1592,7 +1593,6 @@ Status DBImpl::CompactFilesImpl(
1592
1593
  // not check compaction_job.io_status() explicitly if we're not calling
1593
1594
  // SetBGError
1594
1595
  compaction_job.io_status().PermitUncheckedError();
1595
- c->ReleaseCompactionFiles(s);
1596
1596
  // Need to make sure SstFileManager does its bookkeeping
1597
1597
  auto sfm = static_cast<SstFileManagerImpl*>(
1598
1598
  immutable_db_options_.sst_file_manager.get());
@@ -2236,7 +2236,8 @@ void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
2236
2236
  // cfd may be null, see DBImpl::ScheduleFlushes
2237
2237
  continue;
2238
2238
  }
2239
- uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID();
2239
+ uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID(
2240
+ immutable_db_options_.atomic_flush /* for_atomic_flush */);
2240
2241
  req->cfd_to_max_mem_id_to_persist.emplace(cfd, max_memtable_id);
2241
2242
  }
2242
2243
  }
@@ -2280,19 +2281,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2280
2281
  }
2281
2282
  WaitForPendingWrites();
2282
2283
 
2283
- if (flush_reason != FlushReason::kErrorRecoveryRetryFlush &&
2284
- flush_reason != FlushReason::kCatchUpAfterErrorRecovery &&
2285
- (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) {
2286
- // Note that, when flush reason is kErrorRecoveryRetryFlush, during the
2287
- // auto retry resume, we want to avoid creating new small memtables.
2288
- // If flush reason is kCatchUpAfterErrorRecovery, we try to flush any new
2289
- // memtable that filled up during recovery, and we also want to avoid
2290
- // switching memtable to create small memtables.
2291
- // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl
2292
- // will iterate through all the CFs and call FlushMemtable during auto
2293
- // retry resume, it is possible that in some CFs,
2294
- // cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will
2295
- // be created and scheduled, status::OK() will be returned.
2284
+ if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) {
2296
2285
  s = SwitchMemtable(cfd, &context);
2297
2286
  }
2298
2287
  const uint64_t flush_memtable_id = std::numeric_limits<uint64_t>::max();
@@ -2301,10 +2290,10 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2301
2290
  !cached_recoverable_state_empty_.load()) {
2302
2291
  FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}};
2303
2292
  flush_reqs.emplace_back(std::move(req));
2304
- memtable_ids_to_wait.emplace_back(cfd->imm()->GetLatestMemTableID());
2293
+ memtable_ids_to_wait.emplace_back(
2294
+ cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */));
2305
2295
  }
2306
- if (immutable_db_options_.persist_stats_to_disk &&
2307
- flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
2296
+ if (immutable_db_options_.persist_stats_to_disk) {
2308
2297
  ColumnFamilyData* cfd_stats =
2309
2298
  versions_->GetColumnFamilySet()->GetColumnFamily(
2310
2299
  kPersistentStatsColumnFamilyName);
@@ -2330,7 +2319,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2330
2319
  FlushRequest req{flush_reason, {{cfd_stats, flush_memtable_id}}};
2331
2320
  flush_reqs.emplace_back(std::move(req));
2332
2321
  memtable_ids_to_wait.emplace_back(
2333
- cfd_stats->imm()->GetLatestMemTableID());
2322
+ cfd_stats->imm()->GetLatestMemTableID(
2323
+ false /* for_atomic_flush */));
2334
2324
  }
2335
2325
  }
2336
2326
  }
@@ -2381,8 +2371,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2381
2371
  }
2382
2372
  s = WaitForFlushMemTables(
2383
2373
  cfds, flush_memtable_ids,
2384
- (flush_reason == FlushReason::kErrorRecovery ||
2385
- flush_reason == FlushReason::kErrorRecoveryRetryFlush));
2374
+ flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */);
2386
2375
  InstrumentedMutexLock lock_guard(&mutex_);
2387
2376
  for (auto* tmp_cfd : cfds) {
2388
2377
  tmp_cfd->UnrefAndTryDelete();
@@ -2477,9 +2466,7 @@ Status DBImpl::AtomicFlushMemTables(
2477
2466
  }
2478
2467
 
2479
2468
  for (auto cfd : cfds) {
2480
- if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) ||
2481
- flush_reason == FlushReason::kErrorRecoveryRetryFlush ||
2482
- flush_reason == FlushReason::kCatchUpAfterErrorRecovery) {
2469
+ if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) {
2483
2470
  continue;
2484
2471
  }
2485
2472
  cfd->Ref();
@@ -2524,8 +2511,7 @@ Status DBImpl::AtomicFlushMemTables(
2524
2511
  }
2525
2512
  s = WaitForFlushMemTables(
2526
2513
  cfds, flush_memtable_ids,
2527
- (flush_reason == FlushReason::kErrorRecovery ||
2528
- flush_reason == FlushReason::kErrorRecoveryRetryFlush));
2514
+ flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */);
2529
2515
  InstrumentedMutexLock lock_guard(&mutex_);
2530
2516
  for (auto* cfd : cfds) {
2531
2517
  cfd->UnrefAndTryDelete();
@@ -2534,6 +2520,68 @@ Status DBImpl::AtomicFlushMemTables(
2534
2520
  return s;
2535
2521
  }
2536
2522
 
2523
+ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
2524
+ bool wait) {
2525
+ mutex_.AssertHeld();
2526
+ assert(flush_reason == FlushReason::kErrorRecoveryRetryFlush ||
2527
+ flush_reason == FlushReason::kCatchUpAfterErrorRecovery);
2528
+
2529
+ // Collect referenced CFDs.
2530
+ autovector<ColumnFamilyData*> cfds;
2531
+ for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) {
2532
+ if (!cfd->IsDropped() && cfd->initialized() &&
2533
+ cfd->imm()->NumNotFlushed() != 0) {
2534
+ cfd->Ref();
2535
+ cfd->imm()->FlushRequested();
2536
+ cfds.push_back(cfd);
2537
+ }
2538
+ }
2539
+
2540
+ // Submit flush requests for all immutable memtables needing flush.
2541
+ // `flush_memtable_ids` will be populated such that all immutable
2542
+ // memtables eligible for flush are waited on before this function
2543
+ // returns.
2544
+ autovector<uint64_t> flush_memtable_ids;
2545
+ if (immutable_db_options_.atomic_flush) {
2546
+ FlushRequest flush_req;
2547
+ GenerateFlushRequest(cfds, flush_reason, &flush_req);
2548
+ SchedulePendingFlush(flush_req);
2549
+ for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
2550
+ flush_memtable_ids.push_back(iter.second);
2551
+ }
2552
+ } else {
2553
+ for (auto cfd : cfds) {
2554
+ flush_memtable_ids.push_back(
2555
+ cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */));
2556
+ // Impose no bound on the highest memtable ID flushed. There is no
2557
+ // reason to do so outside of atomic flush.
2558
+ FlushRequest flush_req{
2559
+ flush_reason,
2560
+ {{cfd,
2561
+ std::numeric_limits<uint64_t>::max() /* max_mem_id_to_persist */}}};
2562
+ SchedulePendingFlush(flush_req);
2563
+ }
2564
+ }
2565
+ MaybeScheduleFlushOrCompaction();
2566
+
2567
+ Status s;
2568
+ if (wait) {
2569
+ mutex_.Unlock();
2570
+ autovector<const uint64_t*> flush_memtable_id_ptrs;
2571
+ for (auto& flush_memtable_id : flush_memtable_ids) {
2572
+ flush_memtable_id_ptrs.push_back(&flush_memtable_id);
2573
+ }
2574
+ s = WaitForFlushMemTables(cfds, flush_memtable_id_ptrs,
2575
+ true /* resuming_from_bg_err */);
2576
+ mutex_.Lock();
2577
+ }
2578
+
2579
+ for (auto* cfd : cfds) {
2580
+ cfd->UnrefAndTryDelete();
2581
+ }
2582
+ return s;
2583
+ }
2584
+
2537
2585
  // Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can
2538
2586
  // cause write stall, for example if one memtable is being flushed already.
2539
2587
  // This method tries to avoid write stall (similar to CompactRange() behavior)
@@ -3485,8 +3533,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3485
3533
 
3486
3534
  std::unique_ptr<TaskLimiterToken> task_token;
3487
3535
 
3488
- // InternalKey manual_end_storage;
3489
- // InternalKey* manual_end = &manual_end_storage;
3490
3536
  bool sfm_reserved_compact_space = false;
3491
3537
  if (is_manual) {
3492
3538
  ManualCompactionState* m = manual_compaction;
@@ -3622,6 +3668,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3622
3668
  }
3623
3669
 
3624
3670
  IOStatus io_s;
3671
+ bool compaction_released = false;
3625
3672
  if (!c) {
3626
3673
  // Nothing to do
3627
3674
  ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do");
@@ -3644,7 +3691,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3644
3691
  }
3645
3692
  status = versions_->LogAndApply(
3646
3693
  c->column_family_data(), *c->mutable_cf_options(), read_options,
3647
- c->edit(), &mutex_, directories_.GetDbDir());
3694
+ c->edit(), &mutex_, directories_.GetDbDir(),
3695
+ /*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
3696
+ [&c, &compaction_released](const Status& s) {
3697
+ c->ReleaseCompactionFiles(s);
3698
+ compaction_released = true;
3699
+ });
3648
3700
  io_s = versions_->io_status();
3649
3701
  InstallSuperVersionAndScheduleWork(c->column_family_data(),
3650
3702
  &job_context->superversion_contexts[0],
@@ -3652,6 +3704,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3652
3704
  ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n",
3653
3705
  c->column_family_data()->GetName().c_str(),
3654
3706
  c->num_input_files(0));
3707
+ if (status.ok() && io_s.ok()) {
3708
+ UpdateDeletionCompactionStats(c);
3709
+ }
3655
3710
  *made_progress = true;
3656
3711
  TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
3657
3712
  c->column_family_data());
@@ -3710,7 +3765,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3710
3765
  }
3711
3766
  status = versions_->LogAndApply(
3712
3767
  c->column_family_data(), *c->mutable_cf_options(), read_options,
3713
- c->edit(), &mutex_, directories_.GetDbDir());
3768
+ c->edit(), &mutex_, directories_.GetDbDir(),
3769
+ /*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
3770
+ [&c, &compaction_released](const Status& s) {
3771
+ c->ReleaseCompactionFiles(s);
3772
+ compaction_released = true;
3773
+ });
3714
3774
  io_s = versions_->io_status();
3715
3775
  // Use latest MutableCFOptions
3716
3776
  InstallSuperVersionAndScheduleWork(c->column_family_data(),
@@ -3760,6 +3820,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3760
3820
  // Transfer requested token, so it doesn't need to do it again.
3761
3821
  ca->prepicked_compaction->task_token = std::move(task_token);
3762
3822
  ++bg_bottom_compaction_scheduled_;
3823
+ assert(c == nullptr);
3763
3824
  env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM,
3764
3825
  this, &DBImpl::UnscheduleCompactionCallback);
3765
3826
  } else {
@@ -3803,8 +3864,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3803
3864
  compaction_job.Run().PermitUncheckedError();
3804
3865
  TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
3805
3866
  mutex_.Lock();
3806
-
3807
- status = compaction_job.Install(*c->mutable_cf_options());
3867
+ status =
3868
+ compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
3808
3869
  io_s = compaction_job.io_status();
3809
3870
  if (status.ok()) {
3810
3871
  InstallSuperVersionAndScheduleWork(c->column_family_data(),
@@ -3823,7 +3884,23 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3823
3884
  }
3824
3885
 
3825
3886
  if (c != nullptr) {
3826
- c->ReleaseCompactionFiles(status);
3887
+ if (!compaction_released) {
3888
+ c->ReleaseCompactionFiles(status);
3889
+ } else {
3890
+ #ifndef NDEBUG
3891
+ // Sanity checking that compaction files are freed.
3892
+ for (size_t i = 0; i < c->num_input_levels(); i++) {
3893
+ for (size_t j = 0; j < c->inputs(i)->size(); j++) {
3894
+ assert(!c->input(i, j)->being_compacted);
3895
+ }
3896
+ }
3897
+ std::unordered_set<Compaction*>* cip = c->column_family_data()
3898
+ ->compaction_picker()
3899
+ ->compactions_in_progress();
3900
+ assert(cip->find(c.get()) == cip->end());
3901
+ #endif
3902
+ }
3903
+
3827
3904
  *made_progress = true;
3828
3905
 
3829
3906
  // Need to make sure SstFileManager does its bookkeeping
@@ -4008,6 +4085,27 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
4008
4085
  return false;
4009
4086
  }
4010
4087
 
4088
+ void DBImpl::UpdateDeletionCompactionStats(
4089
+ const std::unique_ptr<Compaction>& c) {
4090
+ if (c == nullptr) {
4091
+ return;
4092
+ }
4093
+
4094
+ CompactionReason reason = c->compaction_reason();
4095
+
4096
+ switch (reason) {
4097
+ case CompactionReason::kFIFOMaxSize:
4098
+ RecordTick(stats_, FIFO_MAX_SIZE_COMPACTIONS);
4099
+ break;
4100
+ case CompactionReason::kFIFOTtl:
4101
+ RecordTick(stats_, FIFO_TTL_COMPACTIONS);
4102
+ break;
4103
+ default:
4104
+ assert(false);
4105
+ break;
4106
+ }
4107
+ }
4108
+
4011
4109
  void DBImpl::BuildCompactionJobInfo(
4012
4110
  const ColumnFamilyData* cfd, Compaction* c, const Status& st,
4013
4111
  const CompactionJobStats& compaction_job_stats, const int job_id,
@@ -4021,7 +4119,12 @@ void DBImpl::BuildCompactionJobInfo(
4021
4119
  compaction_job_info->base_input_level = c->start_level();
4022
4120
  compaction_job_info->output_level = c->output_level();
4023
4121
  compaction_job_info->stats = compaction_job_stats;
4024
- compaction_job_info->table_properties = c->GetTableProperties();
4122
+ const auto& input_table_properties = c->GetInputTableProperties();
4123
+ const auto& output_table_properties = c->GetOutputTableProperties();
4124
+ compaction_job_info->table_properties.insert(input_table_properties.begin(),
4125
+ input_table_properties.end());
4126
+ compaction_job_info->table_properties.insert(output_table_properties.begin(),
4127
+ output_table_properties.end());
4025
4128
  compaction_job_info->compaction_reason = c->compaction_reason();
4026
4129
  compaction_job_info->compression = c->output_compression();
4027
4130
 
@@ -306,11 +306,12 @@ const PeriodicTaskScheduler& DBImpl::TEST_GetPeriodicTaskScheduler() const {
306
306
 
307
307
  SeqnoToTimeMapping DBImpl::TEST_GetSeqnoToTimeMapping() const {
308
308
  InstrumentedMutexLock l(&mutex_);
309
- return seqno_time_mapping_;
309
+ return seqno_to_time_mapping_;
310
310
  }
311
311
 
312
312
 
313
313
  size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
314
+ InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
314
315
  return EstimateInMemoryStatsHistorySize();
315
316
  }
316
317
  } // namespace ROCKSDB_NAMESPACE
@@ -100,6 +100,14 @@ Status DBImpl::EnableFileDeletions(bool force) {
100
100
  return Status::OK();
101
101
  }
102
102
 
103
+ int DBImpl::EnableFileDeletionsWithLock() {
104
+ mutex_.AssertHeld();
105
+ // In case others have called EnableFileDeletions(true /* force */) in between
106
+ disable_delete_obsolete_files_ =
107
+ std::max(0, disable_delete_obsolete_files_ - 1);
108
+ return disable_delete_obsolete_files_;
109
+ }
110
+
103
111
  bool DBImpl::IsFileDeletionsEnabled() const {
104
112
  return 0 == disable_delete_obsolete_files_;
105
113
  }
@@ -457,12 +465,12 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
457
465
  std::sort(candidate_files.begin(), candidate_files.end(),
458
466
  [](const JobContext::CandidateFileInfo& lhs,
459
467
  const JobContext::CandidateFileInfo& rhs) {
460
- if (lhs.file_name > rhs.file_name) {
468
+ if (lhs.file_name < rhs.file_name) {
461
469
  return true;
462
- } else if (lhs.file_name < rhs.file_name) {
470
+ } else if (lhs.file_name > rhs.file_name) {
463
471
  return false;
464
472
  } else {
465
- return (lhs.file_path > rhs.file_path);
473
+ return (lhs.file_path < rhs.file_path);
466
474
  }
467
475
  });
468
476
  candidate_files.erase(
@@ -995,7 +1003,7 @@ Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
995
1003
  if (type == kTableFile && number >= next_file_number &&
996
1004
  recovery_ctx->files_to_delete_.find(normalized_fpath) ==
997
1005
  recovery_ctx->files_to_delete_.end()) {
998
- recovery_ctx->files_to_delete_.emplace(normalized_fpath);
1006
+ recovery_ctx->files_to_delete_.emplace(normalized_fpath, path);
999
1007
  }
1000
1008
  }
1001
1009
  }
@@ -25,6 +25,7 @@
25
25
  #include "rocksdb/wal_filter.h"
26
26
  #include "test_util/sync_point.h"
27
27
  #include "util/rate_limiter_impl.h"
28
+ #include "util/string_util.h"
28
29
  #include "util/udt_util.h"
29
30
 
30
31
  namespace ROCKSDB_NAMESPACE {
@@ -291,6 +292,18 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
291
292
  "writes in direct IO require writable_file_max_buffer_size > 0");
292
293
  }
293
294
 
295
+ if (db_options.daily_offpeak_time_utc != "") {
296
+ int start_time, end_time;
297
+ if (!TryParseTimeRangeString(db_options.daily_offpeak_time_utc, start_time,
298
+ end_time)) {
299
+ return Status::InvalidArgument(
300
+ "daily_offpeak_time_utc should be set in the format HH:mm-HH:mm "
301
+ "(e.g. 04:30-07:30)");
302
+ } else if (start_time == end_time) {
303
+ return Status::InvalidArgument(
304
+ "start_time and end_time cannot be the same");
305
+ }
306
+ }
294
307
  return Status::OK();
295
308
  }
296
309
 
@@ -405,7 +418,8 @@ Status DBImpl::Recover(
405
418
  uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
406
419
  mutex_.AssertHeld();
407
420
 
408
- bool is_new_db = false;
421
+ bool tmp_is_new_db = false;
422
+ bool& is_new_db = recovery_ctx ? recovery_ctx->is_new_db_ : tmp_is_new_db;
409
423
  assert(db_lock_ == nullptr);
410
424
  std::vector<std::string> files_in_dbname;
411
425
  if (!read_only) {
@@ -858,7 +872,8 @@ Status DBImpl::PersistentStatsProcessFormatVersion() {
858
872
  if (s.ok()) {
859
873
  ColumnFamilyOptions cfo;
860
874
  OptimizeForPersistentStats(&cfo);
861
- s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle);
875
+ s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName,
876
+ &handle);
862
877
  }
863
878
  if (s.ok()) {
864
879
  persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
@@ -911,7 +926,7 @@ Status DBImpl::InitPersistStatsColumnFamily() {
911
926
  ColumnFamilyHandle* handle = nullptr;
912
927
  ColumnFamilyOptions cfo;
913
928
  OptimizeForPersistentStats(&cfo);
914
- s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle);
929
+ s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName, &handle);
915
930
  persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
916
931
  mutex_.Lock();
917
932
  }
@@ -927,8 +942,11 @@ Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
927
942
  recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
928
943
  if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
929
944
  mutex_.Unlock();
930
- for (const auto& fname : recovery_ctx.files_to_delete_) {
931
- s = env_->DeleteFile(fname);
945
+ for (const auto& stale_sst_file : recovery_ctx.files_to_delete_) {
946
+ s = DeleteDBFile(&immutable_db_options_, stale_sst_file.first,
947
+ stale_sst_file.second,
948
+ /*force_bg=*/false,
949
+ /*force_fg=*/false);
932
950
  if (!s.ok()) {
933
951
  break;
934
952
  }
@@ -1298,7 +1316,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1298
1316
  flushed = true;
1299
1317
 
1300
1318
  cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
1301
- *next_sequence);
1319
+ *next_sequence - 1);
1302
1320
  }
1303
1321
  }
1304
1322
  }
@@ -1649,7 +1667,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1649
1667
  TableFileCreationReason::kRecovery, 0 /* oldest_key_time */,
1650
1668
  0 /* file_creation_time */, db_id_, db_session_id_,
1651
1669
  0 /* target_file_size */, meta.fd.GetNumber());
1652
- SeqnoToTimeMapping empty_seqno_time_mapping;
1670
+ SeqnoToTimeMapping empty_seqno_to_time_mapping;
1653
1671
  Version* version = cfd->current();
1654
1672
  version->Ref();
1655
1673
  const ReadOptions read_option(Env::IOActivity::kDBOpen);
@@ -1661,7 +1679,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1661
1679
  snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
1662
1680
  snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
1663
1681
  io_tracer_, BlobFileCreationReason::kRecovery,
1664
- empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
1682
+ empty_seqno_to_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
1665
1683
  nullptr /* table_properties */, write_hint,
1666
1684
  nullptr /*full_history_ts_low*/, &blob_callback_, version,
1667
1685
  &num_input_entries);
@@ -1715,6 +1733,22 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1715
1733
  for (const auto& blob : blob_file_additions) {
1716
1734
  edit->AddBlobFile(blob);
1717
1735
  }
1736
+
1737
+ // For UDT in memtable only feature, move up the cutoff timestamp whenever
1738
+ // a flush happens.
1739
+ const Comparator* ucmp = cfd->user_comparator();
1740
+ size_t ts_sz = ucmp->timestamp_size();
1741
+ if (ts_sz > 0 && !cfd->ioptions()->persist_user_defined_timestamps) {
1742
+ Slice mem_newest_udt = mem->GetNewestUDT();
1743
+ std::string full_history_ts_low = cfd->GetFullHistoryTsLow();
1744
+ if (full_history_ts_low.empty() ||
1745
+ ucmp->CompareTimestamp(mem_newest_udt, full_history_ts_low) >= 0) {
1746
+ std::string new_full_history_ts_low;
1747
+ GetFullHistoryTsLowFromU64CutoffTs(&mem_newest_udt,
1748
+ &new_full_history_ts_low);
1749
+ edit->SetFullHistoryTsLow(new_full_history_ts_low);
1750
+ }
1751
+ }
1718
1752
  }
1719
1753
 
1720
1754
  InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
@@ -1959,6 +1993,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1959
1993
 
1960
1994
  impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
1961
1995
  RecoveryContext recovery_ctx;
1996
+ impl->options_mutex_.Lock();
1962
1997
  impl->mutex_.Lock();
1963
1998
 
1964
1999
  // Handles create_if_missing, error_if_exists
@@ -2040,7 +2075,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
2040
2075
  // missing column family, create it
2041
2076
  ColumnFamilyHandle* handle = nullptr;
2042
2077
  impl->mutex_.Unlock();
2043
- s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
2078
+ // NOTE: the work normally done in WrapUpCreateColumnFamilies will
2079
+ // be done separately below.
2080
+ s = impl->CreateColumnFamilyImpl(cf.options, cf.name, &handle);
2044
2081
  impl->mutex_.Lock();
2045
2082
  if (s.ok()) {
2046
2083
  handles->push_back(handle);
@@ -2091,9 +2128,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
2091
2128
  if (s.ok()) {
2092
2129
  // Persist RocksDB Options before scheduling the compaction.
2093
2130
  // The WriteOptionsFile() will release and lock the mutex internally.
2094
- persist_options_status = impl->WriteOptionsFile(
2095
- false /*need_mutex_lock*/, false /*need_enter_write_thread*/);
2096
-
2131
+ persist_options_status =
2132
+ impl->WriteOptionsFile(true /*db_mutex_already_held*/);
2097
2133
  *dbptr = impl;
2098
2134
  impl->opened_successfully_ = true;
2099
2135
  impl->DeleteObsoleteFiles();
@@ -2214,10 +2250,10 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
2214
2250
  if (s.ok()) {
2215
2251
  s = impl->StartPeriodicTaskScheduler();
2216
2252
  }
2217
-
2218
2253
  if (s.ok()) {
2219
- s = impl->RegisterRecordSeqnoTimeWorker();
2254
+ s = impl->RegisterRecordSeqnoTimeWorker(recovery_ctx.is_new_db_);
2220
2255
  }
2256
+ impl->options_mutex_.Unlock();
2221
2257
  if (!s.ok()) {
2222
2258
  for (auto* h : *handles) {
2223
2259
  delete h;