@nxtedition/rocksdb 10.1.4 → 10.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/binding.cc +16 -12
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
  3. package/deps/rocksdb/rocksdb/Makefile +38 -15
  4. package/deps/rocksdb/rocksdb/TARGETS +10 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  7. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  8. package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
  9. package/deps/rocksdb/rocksdb/db/builder.h +1 -1
  10. package/deps/rocksdb/rocksdb/db/c.cc +205 -6
  11. package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
  12. package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
  14. package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
  26. package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
  27. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
  28. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
  29. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
  30. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
  35. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  36. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
  37. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
  38. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
  39. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
  40. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
  41. package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
  42. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  43. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  44. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
  45. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
  46. package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
  47. package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
  48. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
  49. package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
  50. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
  51. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  52. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
  53. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
  54. package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
  55. package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
  56. package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
  57. package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
  58. package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
  59. package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
  60. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  61. package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
  62. package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
  63. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
  64. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  65. package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
  66. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  67. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
  68. package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
  69. package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
  70. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
  71. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
  72. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
  73. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
  74. package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
  75. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
  76. package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
  77. package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
  78. package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
  79. package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
  80. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
  82. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
  83. package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
  84. package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
  85. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  86. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
  90. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
  91. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
  92. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
  93. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
  94. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
  95. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
  96. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
  97. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
  98. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
  99. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
  100. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
  101. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
  102. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
  103. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
  104. package/deps/rocksdb/rocksdb/env/env.cc +6 -0
  105. package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
  106. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  107. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
  108. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
  109. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
  110. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
  111. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
  113. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
  115. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
  116. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
  119. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
  120. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  122. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
  128. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  129. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
  130. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  131. package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
  132. package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
  133. package/deps/rocksdb/rocksdb/options/options.cc +3 -0
  134. package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
  135. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  136. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
  137. package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
  138. package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
  139. package/deps/rocksdb/rocksdb/src.mk +4 -0
  140. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  141. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
  142. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
  143. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  147. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
  148. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  149. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
  150. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
  151. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  152. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  153. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
  154. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
  155. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
  156. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  157. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
  158. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
  160. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
  161. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
  162. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  163. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  165. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  166. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
  167. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
  168. package/deps/rocksdb/rocksdb/table/format.cc +1 -2
  169. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
  170. package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
  171. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
  172. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
  173. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
  174. package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
  175. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  176. package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
  177. package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
  178. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
  179. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
  180. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
  181. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
  182. package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
  183. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  184. package/deps/rocksdb/rocksdb/util/random.cc +2 -1
  185. package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
  186. package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
  187. package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
  188. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
  189. package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
  191. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
  192. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
  193. package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
  194. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
  195. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
  196. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  197. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  198. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
  199. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  200. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
  201. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
  202. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
  203. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  204. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
  205. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
  206. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
  207. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
  208. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
  209. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
  210. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  211. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
  212. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  213. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
  214. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
  215. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  216. package/index.js +1 -2
  217. package/package.json +1 -1
  218. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  219. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  220. package/util.h +25 -2
  221. package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
  222. package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
  223. package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
  224. package/.tap/test-results/test/batch-test.js.tap +0 -12
  225. package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
  226. package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
  227. package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
  228. package/.tap/test-results/test/column-test.js.tap +0 -55
  229. package/.tap/test-results/test/common.js.tap +0 -0
  230. package/.tap/test-results/test/compression-test.js.tap +0 -30
  231. package/.tap/test-results/test/db-identity.js.tap +0 -12
  232. package/.tap/test-results/test/electron.js.tap +0 -0
  233. package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
  234. package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
  235. package/.tap/test-results/test/gc.js.tap +0 -0
  236. package/.tap/test-results/test/getproperty-test.js.tap +0 -29
  237. package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
  238. package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
  239. package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
  240. package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
  241. package/.tap/test-results/test/iterator-test.js.tap +0 -6
  242. package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
  243. package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
  244. package/.tap/test-results/test/leak-tester.js.tap +0 -0
  245. package/.tap/test-results/test/lock-test.js.tap +0 -18
  246. package/.tap/test-results/test/lock.js.tap +0 -0
  247. package/.tap/test-results/test/make.js.tap +0 -0
  248. package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
  249. package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
  250. package/.tap/test-results/test/mkdir-test.js.tap +0 -15
  251. package/.tap/test-results/test/segfault-test.js.tap +0 -76
  252. package/.tap/test-results/test/stack-blower.js.tap +0 -0
  253. package/deps/rocksdb/rocksdb/README.md +0 -29
  254. package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
  255. package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
  256. package/deps/rocksdb/rocksdb/port/README +0 -10
  257. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  258. package/tmp/000099.sst +0 -0
  259. package/tmp/000102.sst +0 -0
  260. package/tmp/000103.log +0 -0
  261. package/tmp/CURRENT +0 -1
  262. package/tmp/IDENTITY +0 -1
  263. package/tmp/LOCK +0 -0
  264. package/tmp/MANIFEST-000104 +0 -0
  265. package/tmp/OPTIONS-000098 +0 -207
  266. package/tmp/OPTIONS-000106 +0 -207
@@ -28,6 +28,7 @@
28
28
  #include "rocksdb/thread_status.h"
29
29
  #include "rocksdb/transaction_log.h"
30
30
  #include "rocksdb/types.h"
31
+ #include "rocksdb/user_write_callback.h"
31
32
  #include "rocksdb/version.h"
32
33
  #include "rocksdb/wide_columns.h"
33
34
 
@@ -583,6 +584,15 @@ class DB {
583
584
  // Note: consider setting options.sync = true.
584
585
  virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
585
586
 
587
+ // Same as DB::Write, and takes a `UserWriteCallback` argument to allow
588
+ // users to plug in custom logic in callback functions during the write.
589
+ virtual Status WriteWithCallback(const WriteOptions& /*options*/,
590
+ WriteBatch* /*updates*/,
591
+ UserWriteCallback* /*user_write_cb*/) {
592
+ return Status::NotSupported(
593
+ "WriteWithCallback not implemented for this interface.");
594
+ }
595
+
586
596
  // If the column family specified by "column_family" contains an entry for
587
597
  // "key", return the corresponding value in "*value". If the entry is a plain
588
598
  // key-value, return the value as-is; if it is a wide-column entity, return
@@ -1324,9 +1334,10 @@ class DB {
1324
1334
 
1325
1335
  // DB implementations export properties about their state via this method.
1326
1336
  // If "property" is a valid "string" property understood by this DB
1327
- // implementation (see Properties struct above for valid options), fills
1328
- // "*value" with its current value and returns true. Otherwise, returns
1329
- // false.
1337
+ // implementation (see Properties struct above for valid options) and the DB
1338
+ // is able to get and fill "*value" with its current value, then return true.
1339
+ // In all the other cases (e.g, "property" is an invalid "string" property, IO
1340
+ // errors ..), it returns false.
1330
1341
  virtual bool GetProperty(ColumnFamilyHandle* column_family,
1331
1342
  const Slice& property, std::string* value) = 0;
1332
1343
  virtual bool GetProperty(const Slice& property, std::string* value) {
@@ -1831,7 +1842,7 @@ class DB {
1831
1842
  bool flush_memtable = true) = 0;
1832
1843
 
1833
1844
  // Retrieve the sorted list of all wal files with earliest file first
1834
- virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
1845
+ virtual Status GetSortedWalFiles(VectorWalPtr& files) = 0;
1835
1846
 
1836
1847
  // Retrieve information about the current wal file
1837
1848
  //
@@ -1841,7 +1852,7 @@ class DB {
1841
1852
  // Additionally, for the sake of optimization current_log_file->StartSequence
1842
1853
  // would always be set to 0
1843
1854
  virtual Status GetCurrentWalFile(
1844
- std::unique_ptr<LogFile>* current_log_file) = 0;
1855
+ std::unique_ptr<WalFile>* current_log_file) = 0;
1845
1856
 
1846
1857
  // IngestExternalFile() will load a list of external SST files (1) into the DB
1847
1858
  // Two primary modes are supported:
@@ -145,6 +145,11 @@ struct EnvOptions {
145
145
  // Exceptions MUST NOT propagate out of overridden functions into RocksDB,
146
146
  // because RocksDB is not exception-safe. This could cause undefined behavior
147
147
  // including data loss, unreported corruption, deadlocks, and more.
148
+ // An interface that abstracts RocksDB's interactions with the operating system
149
+ // environment. There are three main types of APIs:
150
+ // 1) File system operations, like creating a file, writing to a file, etc.
151
+ // 2) Thread management.
152
+ // 3) Misc functions, like getting the current time.
148
153
  class Env : public Customizable {
149
154
  public:
150
155
  static const char* kDefaultName() { return "DefaultEnv"; }
@@ -55,22 +55,6 @@ class Iterator : public IteratorBase {
55
55
  return kNoWideColumns;
56
56
  }
57
57
 
58
- // If supported, the DB state that the iterator reads from is updated to
59
- // the latest state. The iterator will be invalidated after the call.
60
- // Regardless of whether the iterator was created/refreshed previously
61
- // with or without a snapshot, the iterator will be reading the
62
- // latest DB state after this call.
63
- // Note that you will need to call a Seek*() function to get the iterator
64
- // back into a valid state before calling a function that assumes the
65
- // state is already valid, like Next().
66
- virtual Status Refresh() { return Refresh(nullptr); }
67
-
68
- // Similar to Refresh() but the iterator will be reading the latest DB state
69
- // under the given snapshot.
70
- virtual Status Refresh(const class Snapshot*) {
71
- return Status::NotSupported("Refresh() is not supported");
72
- }
73
-
74
58
  // Property "rocksdb.iterator.is-key-pinned":
75
59
  // If returning "1", this means that the Slice returned by key() is valid
76
60
  // as long as the iterator is not deleted.
@@ -58,6 +58,22 @@ class IteratorBase : public Cleanable {
58
58
  // REQUIRES: Valid()
59
59
  virtual void Prev() = 0;
60
60
 
61
+ // If supported, the DB state that the iterator reads from is updated to
62
+ // the latest state. The iterator will be invalidated after the call.
63
+ // Regardless of whether the iterator was created/refreshed previously
64
+ // with or without a snapshot, the iterator will be reading the
65
+ // latest DB state after this call.
66
+ // Note that you will need to call a Seek*() function to get the iterator
67
+ // back into a valid state before calling a function that assumes the
68
+ // state is already valid, like Next().
69
+ virtual Status Refresh() { return Refresh(nullptr); }
70
+
71
+ // Similar to Refresh() but the iterator will be reading the latest DB state
72
+ // under the given snapshot.
73
+ virtual Status Refresh(const class Snapshot*) {
74
+ return Status::NotSupported("Refresh() is not supported");
75
+ }
76
+
61
77
  // Return the key for the current entry. The underlying storage for
62
78
  // the returned slice is valid only until the next modification of the
63
79
  // iterator (i.e. the next SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev
@@ -328,6 +328,15 @@ struct BlobFileGarbageInfo : public BlobFileInfo {
328
328
  uint64_t garbage_blob_bytes;
329
329
  };
330
330
 
331
+ struct ManualFlushInfo {
332
+ // the id of the column family
333
+ uint32_t cf_id;
334
+ // the name of the column family
335
+ std::string cf_name;
336
+ // Reason that triggered this manual flush
337
+ FlushReason flush_reason;
338
+ };
339
+
331
340
  struct FlushJobInfo {
332
341
  // the id of the column family
333
342
  uint32_t cf_id;
@@ -492,6 +501,10 @@ struct MemTableInfo {
492
501
  uint64_t num_entries;
493
502
  // Total number of deletes in memtable
494
503
  uint64_t num_deletes;
504
+
505
+ // The newest user-defined timestamps in the memtable. Note this field is
506
+ // only populated when `persist_user_defined_timestamps` is false.
507
+ std::string newest_udt;
495
508
  };
496
509
 
497
510
  struct ExternalFileIngestionInfo {
@@ -595,6 +608,14 @@ class EventListener : public Customizable {
595
608
  virtual void OnFlushBegin(DB* /*db*/,
596
609
  const FlushJobInfo& /*flush_job_info*/) {}
597
610
 
611
+ // A callback function to RocksDB which will be called after a manual flush
612
+ // is scheduled. The default implementation is no-op.
613
+ // The size of the `manual_flush_info` vector should only be bigger than 1 if
614
+ // the DB enables atomic flush and has more than 1 column families. Its size
615
+ // should be 1 in all other cases.
616
+ virtual void OnManualFlushScheduled(
617
+ DB* /*db*/, const std::vector<ManualFlushInfo>& /*manual_flush_info*/) {}
618
+
598
619
  // A callback function for RocksDB which will be called whenever
599
620
  // a SST file is deleted. Different from OnCompactionCompleted and
600
621
  // OnFlushCompleted, this callback is designed for external logging
@@ -234,6 +234,12 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
234
234
  // Number of files to trigger level-0 compaction. A value <0 means that
235
235
  // level-0 compaction will not be triggered by number of files at all.
236
236
  //
237
+ // Universal compaction: RocksDB will try to keep the number of sorted runs
238
+ // no more than this number. If CompactionOptionsUniversal::max_read_amp is
239
+ // set, then this option will be used only as a trigger to look for
240
+ // compaction. CompactionOptionsUniversal::max_read_amp will be the limit
241
+ // on the number of sorted runs.
242
+ //
237
243
  // Default: 4
238
244
  //
239
245
  // Dynamically changeable through SetOptions() API
@@ -344,6 +350,48 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
344
350
  // Dynamically changeable through SetOptions() API
345
351
  uint32_t memtable_max_range_deletions = 0;
346
352
 
353
+ // EXPERIMENTAL
354
+ // When > 0, RocksDB attempts to erase some block cache entries for files
355
+ // that have become obsolete, which means they are about to be deleted.
356
+ // To avoid excessive tracking, this "uncaching" process is iterative and
357
+ // speculative, meaning it could incur extra background CPU effort if the
358
+ // file's blocks are generally not cached. A larger number indicates more
359
+ // willingness to spend CPU time to maximize block cache hit rates by
360
+ // erasing known-obsolete entries.
361
+ //
362
+ // When uncache_aggressiveness=1, block cache entries for an obsolete file
363
+ // are only erased until any attempted erase operation fails because the
364
+ // block is not cached. Then no further attempts are made to erase cached
365
+ // blocks for that file.
366
+ //
367
+ // For larger values, erasure is attempted until evidence incidates that the
368
+ // chance of success is < 0.99^(a-1), where a = uncache_aggressiveness. For
369
+ // example:
370
+ // 2 -> Attempt only while expecting >= 99% successful/useful erasure
371
+ // 11 -> 90%
372
+ // 69 -> 50%
373
+ // 110 -> 33%
374
+ // 230 -> 10%
375
+ // 460 -> 1%
376
+ // 690 -> 0.1%
377
+ // 1000 -> 1 in 23000
378
+ // 10000 -> Always (for all practical purposes)
379
+ // NOTE: UINT32_MAX and nearby values could take additional special meanings
380
+ // in the future.
381
+ //
382
+ // Pinned cache entries (guaranteed present) are always erased if
383
+ // uncache_aggressiveness > 0, but are not used in predicting the chances of
384
+ // successful erasure of non-pinned entries.
385
+ //
386
+ // NOTE: In the case of copied DBs (such as Checkpoints) sharing a block
387
+ // cache, it is possible that a file becoming obsolete doesn't mean its
388
+ // block cache entries (shared among copies) are obsolete. Such a scenerio
389
+ // is the best case for uncache_aggressiveness = 0.
390
+ //
391
+ // Once validated in production, the default will likely change to something
392
+ // around 300.
393
+ uint32_t uncache_aggressiveness = 0;
394
+
347
395
  // Create ColumnFamilyOptions with default values for all fields
348
396
  ColumnFamilyOptions();
349
397
  // Create ColumnFamilyOptions from Options
@@ -597,9 +645,7 @@ struct DBOptions {
597
645
  bool verify_sst_unique_id_in_manifest = true;
598
646
 
599
647
  // Use the specified object to interact with the environment,
600
- // e.g. to read/write files, schedule background work, etc. In the near
601
- // future, support for doing storage operations such as read/write files
602
- // through env will be deprecated in favor of file_system (see below)
648
+ // e.g. to read/write files, schedule background work, etc.
603
649
  // Default: Env::Default()
604
650
  Env* env = Env::Default();
605
651
 
@@ -1299,6 +1345,15 @@ struct DBOptions {
1299
1345
  // the WAL is read.
1300
1346
  CompressionType wal_compression = kNoCompression;
1301
1347
 
1348
+ // Set to true to re-instate an old behavior of keeping complete, synced WAL
1349
+ // files open for write until they are collected for deletion by a
1350
+ // background thread. This should not be needed unless there is a
1351
+ // performance issue with file Close(), but setting it to true means that
1352
+ // Checkpoint might call LinkFile on a WAL still open for write, which might
1353
+ // be unsupported on some FileSystem implementations. As this is intended as
1354
+ // a temporary kill switch, it is already DEPRECATED.
1355
+ bool background_close_inactive_wals = false;
1356
+
1302
1357
  // If true, RocksDB supports flushing multiple column families and committing
1303
1358
  // their results atomically to MANIFEST. Note that it is not
1304
1359
  // necessary to set atomic_flush to true if WAL is always enabled since WAL
@@ -2124,6 +2179,24 @@ struct IngestExternalFileOptions {
2124
2179
  //
2125
2180
  // XXX: "bottommost" is obsolete/confusing terminology to refer to last level
2126
2181
  bool fail_if_not_bottommost_level = false;
2182
+ // EXPERIMENTAL
2183
+ // If set to true, ingestion will
2184
+ // - allow the files to not be generated by SstFileWriter, and
2185
+ // - ignore cf_id mismatch between cf_id in the files and the CF they are
2186
+ // being ingested into.
2187
+ //
2188
+ // REQUIRES:
2189
+ // - files to be ingested do not overlap with existing keys.
2190
+ // - write_global_seqno = false
2191
+ // - move_files = false
2192
+ //
2193
+ // Warning: This ONLY works for SST files where all keys have sequence number
2194
+ // zero and with no duplicated user keys (this should be guaranteed if the
2195
+ // file is generated by a DB with zero as the largest sequence number).
2196
+ // We scan the entire SST files to validate sequence numbers.
2197
+ // Warning: If a DB contains ingested files generated by another DB/CF,
2198
+ // RepairDB() may not correctly recover these files. It may lose these files.
2199
+ bool allow_db_generated_files = false;
2127
2200
  };
2128
2201
 
2129
2202
  enum TraceFilterType : uint64_t {
@@ -157,8 +157,25 @@ class TablePropertiesCollectorFactory : public Customizable {
157
157
  // The level at creating the SST file (i.e, table), of which the
158
158
  // properties are being collected.
159
159
  int level_at_creation = kUnknownLevelAtCreation;
160
+ int num_levels = kUnknownNumLevels;
161
+ // In the tiering case, data with seqnos smaller than or equal to this
162
+ // cutoff sequence number will be considered by a compaction job as eligible
163
+ // to be placed on the last level. When this is the maximum sequence number,
164
+ // it indicates tiering is disabled.
165
+ SequenceNumber last_level_inclusive_max_seqno_threshold;
160
166
  static const uint32_t kUnknownColumnFamily;
161
167
  static const int kUnknownLevelAtCreation = -1;
168
+ static const int kUnknownNumLevels = -1;
169
+
170
+ Context() {}
171
+
172
+ Context(uint32_t _column_family_id, int _level_at_creation, int _num_levels,
173
+ SequenceNumber _last_level_inclusive_max_seqno_threshold)
174
+ : column_family_id(_column_family_id),
175
+ level_at_creation(_level_at_creation),
176
+ num_levels(_num_levels),
177
+ last_level_inclusive_max_seqno_threshold(
178
+ _last_level_inclusive_max_seqno_threshold) {}
162
179
  };
163
180
 
164
181
  ~TablePropertiesCollectorFactory() override {}
@@ -14,8 +14,10 @@
14
14
 
15
15
  namespace ROCKSDB_NAMESPACE {
16
16
 
17
- class LogFile;
18
- using VectorLogPtr = std::vector<std::unique_ptr<LogFile>>;
17
+ class WalFile;
18
+ using VectorWalPtr = std::vector<std::unique_ptr<WalFile>>;
19
+ // DEPRECATED old name
20
+ using VectorLogPtr = VectorWalPtr;
19
21
 
20
22
  enum WalFileType {
21
23
  /* Indicates that WAL file is in archive directory. WAL files are moved from
@@ -30,10 +32,10 @@ enum WalFileType {
30
32
  kAliveLogFile = 1
31
33
  };
32
34
 
33
- class LogFile {
35
+ class WalFile {
34
36
  public:
35
- LogFile() {}
36
- virtual ~LogFile() {}
37
+ WalFile() {}
38
+ virtual ~WalFile() {}
37
39
 
38
40
  // Returns log file's pathname relative to the main db dir
39
41
  // Eg. For a live-log-file = /000003.log
@@ -50,10 +52,14 @@ class LogFile {
50
52
  // Starting sequence number of writebatch written in this log file
51
53
  virtual SequenceNumber StartSequence() const = 0;
52
54
 
53
- // Size of log file on disk in Bytes
55
+ // The position of the last flushed write to the file (which for
56
+ // recycled WAL files is typically less than the full file size).
54
57
  virtual uint64_t SizeFileBytes() const = 0;
55
58
  };
56
59
 
60
+ // DEPRECATED old name for WalFile. (Confusing with "Logger" etc.)
61
+ using LogFile = WalFile;
62
+
57
63
  struct BatchResult {
58
64
  SequenceNumber sequence = 0;
59
65
  std::unique_ptr<WriteBatch> writeBatchPtr;
@@ -65,6 +65,36 @@ class CompactionOptionsUniversal {
65
65
  // Default: -1
66
66
  int compression_size_percent;
67
67
 
68
+ // The limit on the number of sorted runs. RocksDB will try to keep
69
+ // the number of sorted runs at most this number. While compactions are
70
+ // running, the number of sorted runs may be temporarily higher than
71
+ // this number.
72
+ //
73
+ // Since universal compaction checks if there is compaction to do when
74
+ // the number of sorted runs is at least level0_file_num_compaction_trigger,
75
+ // it is suggested to set level0_file_num_compaction_trigger to be no larger
76
+ // than max_read_amp.
77
+ //
78
+ // Values:
79
+ // -1: special flag to let RocksDB pick default. Currently,
80
+ // RocksDB will fall back to the behavior before this option is introduced,
81
+ // which is to use level0_file_num_compaction_trigger as the limit.
82
+ // This may change in the future to behave as 0 below.
83
+ // 0: Let RocksDB auto-tune. Currently, we determine the max number of
84
+ // sorted runs based on the current DB size, size_ratio and
85
+ // write_buffer_size. Note that this is only supported for the default
86
+ // stop_style kCompactionStopStyleTotalSize. For
87
+ // kCompactionStopStyleSimilarSize, this behaves as if -1 is configured.
88
+ // N > 0: limit the number of sorted runs to be at most N.
89
+ // N should be at least the compaction trigger specified by
90
+ // level0_file_num_compaction_trigger. If 0 < max_read_amp <
91
+ // level0_file_num_compaction_trigger, Status::NotSupported() will be
92
+ // returned during DB open.
93
+ // N < -1: Status::NotSupported() will be returned during DB open.
94
+ //
95
+ // Default: -1
96
+ int max_read_amp;
97
+
68
98
  // The algorithm used to stop picking files into a single compaction run
69
99
  // Default: kCompactionStopStyleTotalSize
70
100
  CompactionStopStyle stop_style;
@@ -88,6 +118,7 @@ class CompactionOptionsUniversal {
88
118
  max_merge_width(UINT_MAX),
89
119
  max_size_amplification_percent(200),
90
120
  compression_size_percent(-1),
121
+ max_read_amp(-1),
91
122
  stop_style(kCompactionStopStyleTotalSize),
92
123
  allow_trivial_move(false),
93
124
  incremental(false) {}
@@ -0,0 +1,29 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ //
3
+ // This source code is licensed under both the GPLv2 (found in the
4
+ // COPYING file in the root directory) and Apache 2.0 License
5
+ // (found in the LICENSE.Apache file in the root directory).
6
+
7
+ #pragma once
8
+
9
+ #include "rocksdb/status.h"
10
+
11
+ namespace ROCKSDB_NAMESPACE {
12
+
13
+ // Custom callback functions to support users to plug in logic while data is
14
+ // being written to the DB. It's intended for better synchronization between
15
+ // concurrent writes. Note that these callbacks are in the write's critical path
16
+ // It's desirable to keep them fast and minimum to not affect the write's
17
+ // latency. These callbacks may be called in the context of a different thread.
18
+ class UserWriteCallback {
19
+ public:
20
+ virtual ~UserWriteCallback() {}
21
+
22
+ // This function will be called after the write is enqueued.
23
+ virtual void OnWriteEnqueued() = 0;
24
+
25
+ // This function will be called after wal write finishes if it applies.
26
+ virtual void OnWalWriteFinish() = 0;
27
+ };
28
+
29
+ } // namespace ROCKSDB_NAMESPACE
@@ -32,8 +32,10 @@ class Checkpoint {
32
32
  // same filesystem as the database, and copied otherwise.
33
33
  // (2) other required files (like MANIFEST) are always copied.
34
34
  // log_size_for_flush: if the total log file size is equal or larger than
35
- // this value, then a flush is triggered for all the column families. The
36
- // default value is 0, which means flush is always triggered. If you move
35
+ // this value, then a flush is triggered for all the column families.
36
+ // The archived log size will not be included when calculating the total log
37
+ // size.
38
+ // The default value is 0, which means flush is always triggered. If you move
37
39
  // away from the default, the checkpoint may not contain up-to-date data
38
40
  // if WAL writing is not always enabled.
39
41
  // Flush will always trigger if it is 2PC.
@@ -16,7 +16,6 @@
16
16
  #include <memory>
17
17
  #include <unordered_map>
18
18
 
19
- #include "options/configurable_helper.h"
20
19
  #include "rocksdb/convenience.h"
21
20
  #include "rocksdb/customizable.h"
22
21
  #include "rocksdb/status.h"
@@ -35,6 +35,7 @@ class LDBCommand {
35
35
  static const std::string ARG_DB;
36
36
  static const std::string ARG_PATH;
37
37
  static const std::string ARG_SECONDARY_PATH;
38
+ static const std::string ARG_LEADER_PATH;
38
39
  static const std::string ARG_HEX;
39
40
  static const std::string ARG_KEY_HEX;
40
41
  static const std::string ARG_VALUE_HEX;
@@ -83,6 +84,10 @@ class LDBCommand {
83
84
 
84
85
  static LDBCommand* SelectCommand(const ParsedParams& parsed_parms);
85
86
 
87
+ static void ParseSingleParam(const std::string& param,
88
+ ParsedParams& parsed_params,
89
+ std::vector<std::string>& cmd_tokens);
90
+
86
91
  static LDBCommand* InitFromCmdLineArgs(
87
92
  const std::vector<std::string>& args, const Options& options,
88
93
  const LDBOptions& ldb_options,
@@ -156,10 +161,12 @@ class LDBCommand {
156
161
  // with this secondary path. When running against a database opened by
157
162
  // another process, ldb wll leave the source directory completely intact.
158
163
  std::string secondary_path_;
164
+ std::string leader_path_;
159
165
  std::string column_family_name_;
160
166
  DB* db_;
161
167
  DBWithTTL* db_ttl_;
162
168
  std::map<std::string, ColumnFamilyHandle*> cf_handles_;
169
+ std::map<uint32_t, const Comparator*> ucmps_;
163
170
 
164
171
  /**
165
172
  * true implies that this command can work if the db is opened in read-only
@@ -224,17 +231,19 @@ class LDBCommand {
224
231
  ColumnFamilyHandle* GetCfHandle();
225
232
 
226
233
  static std::string PrintKeyValue(const std::string& key,
234
+ const std::string& timestamp,
227
235
  const std::string& value, bool is_key_hex,
228
- bool is_value_hex);
236
+ bool is_value_hex, const Comparator* ucmp);
229
237
 
230
238
  static std::string PrintKeyValue(const std::string& key,
231
- const std::string& value, bool is_hex);
232
-
233
- static std::string PrintKeyValueOrWideColumns(const Slice& key,
234
- const Slice& value,
235
- const WideColumns& wide_columns,
236
- bool is_key_hex,
237
- bool is_value_hex);
239
+ const std::string& timestamp,
240
+ const std::string& value, bool is_hex,
241
+ const Comparator* ucmp);
242
+
243
+ static std::string PrintKeyValueOrWideColumns(
244
+ const Slice& key, const Slice& timestamp, const Slice& value,
245
+ const WideColumns& wide_columns, bool is_key_hex, bool is_value_hex,
246
+ const Comparator* ucmp);
238
247
 
239
248
  /**
240
249
  * Return true if the specified flag is present in the specified flags vector
@@ -501,12 +501,12 @@ class StackableDB : public DB {
501
501
  return db_->GetFullHistoryTsLow(column_family, ts_low);
502
502
  }
503
503
 
504
- Status GetSortedWalFiles(VectorLogPtr& files) override {
504
+ Status GetSortedWalFiles(VectorWalPtr& files) override {
505
505
  return db_->GetSortedWalFiles(files);
506
506
  }
507
507
 
508
508
  Status GetCurrentWalFile(
509
- std::unique_ptr<LogFile>* current_log_file) override {
509
+ std::unique_ptr<WalFile>* current_log_file) override {
510
510
  return db_->GetCurrentWalFile(current_log_file);
511
511
  }
512
512
 
@@ -84,4 +84,50 @@ std::shared_ptr<CompactOnDeletionCollectorFactory>
84
84
  NewCompactOnDeletionCollectorFactory(size_t sliding_window_size,
85
85
  size_t deletion_trigger,
86
86
  double deletion_ratio = 0);
87
+
88
+ // A factory of a table property collector that marks a SST file as
89
+ // need-compaction when for the tiering use case, it observes, among all the
90
+ // data entries, the ratio of entries that are already eligible to be placed on
91
+ // the last level but are not yet on the last level is equal to or higher than
92
+ // the configured `compaction_trigger_ratio_`.
93
+ // 1) Setting the ratio to be equal to or smaller than 0 disables this collector
94
+ // 2) Setting the ratio to be within (0, 1] will write the number of
95
+ // observed eligible entries into a user property and marks a file as
96
+ // need-compaction when aforementioned condition is met.
97
+ // 3) Setting the ratio to be higher than 1 can be used to just writes the user
98
+ // table property, and not mark any file as need compaction.
99
+ // For a column family that does not enable tiering feature, even if an
100
+ // effective configuration is provided, this collector is still disabled.
101
+ class CompactForTieringCollectorFactory
102
+ : public TablePropertiesCollectorFactory {
103
+ public:
104
+ // @param compaction_trigger_ratio: the triggering threshold for the ratio of
105
+ // eligible entries to the total number of entries. See class documentation
106
+ // for what entry is eligible.
107
+ CompactForTieringCollectorFactory(double compaction_trigger_ratio);
108
+
109
+ ~CompactForTieringCollectorFactory() {}
110
+
111
+ TablePropertiesCollector* CreateTablePropertiesCollector(
112
+ TablePropertiesCollectorFactory::Context context) override;
113
+
114
+ void SetCompactionTriggerRatio(double new_ratio) {
115
+ compaction_trigger_ratio_.store(new_ratio);
116
+ }
117
+
118
+ double GetCompactionTriggerRatio() const {
119
+ return compaction_trigger_ratio_.load();
120
+ }
121
+
122
+ static const char* kClassName() { return "CompactForTieringCollector"; }
123
+ const char* Name() const override { return kClassName(); }
124
+
125
+ std::string ToString() const override;
126
+
127
+ private:
128
+ std::atomic<double> compaction_trigger_ratio_;
129
+ };
130
+
131
+ std::shared_ptr<CompactForTieringCollectorFactory>
132
+ NewCompactForTieringCollectorFactory(double compaction_trigger_ratio);
87
133
  } // namespace ROCKSDB_NAMESPACE
@@ -443,6 +443,13 @@ class Transaction {
443
443
  }
444
444
  }
445
445
 
446
+ virtual Status GetEntityForUpdate(const ReadOptions& read_options,
447
+ ColumnFamilyHandle* column_family,
448
+ const Slice& key,
449
+ PinnableWideColumns* columns,
450
+ bool exclusive = true,
451
+ bool do_validate = true) = 0;
452
+
446
453
  virtual std::vector<Status> MultiGetForUpdate(
447
454
  const ReadOptions& options,
448
455
  const std::vector<ColumnFamilyHandle*>& column_family,
@@ -12,8 +12,8 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 9
15
- #define ROCKSDB_MINOR 3
16
- #define ROCKSDB_PATCH 1
15
+ #define ROCKSDB_MINOR 5
16
+ #define ROCKSDB_PATCH 0
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
19
19
  // double underscore. Now we have to live with our choice. We'll deprecate these
@@ -239,6 +239,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
239
239
  {offsetof(class CompactionOptionsUniversal, compression_size_percent),
240
240
  OptionType::kInt, OptionVerificationType::kNormal,
241
241
  OptionTypeFlags::kMutable}},
242
+ {"max_read_amp",
243
+ {offsetof(class CompactionOptionsUniversal, max_read_amp),
244
+ OptionType::kInt, OptionVerificationType::kNormal,
245
+ OptionTypeFlags::kMutable}},
242
246
  {"stop_style",
243
247
  {offsetof(class CompactionOptionsUniversal, stop_style),
244
248
  OptionType::kCompactionStopStyle, OptionVerificationType::kNormal,
@@ -519,6 +523,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
519
523
  {offsetof(struct MutableCFOptions, bottommost_file_compaction_delay),
520
524
  OptionType::kUInt32T, OptionVerificationType::kNormal,
521
525
  OptionTypeFlags::kMutable}},
526
+ {"uncache_aggressiveness",
527
+ {offsetof(struct MutableCFOptions, uncache_aggressiveness),
528
+ OptionType::kUInt32T, OptionVerificationType::kNormal,
529
+ OptionTypeFlags::kMutable}},
522
530
  {"block_protection_bytes_per_key",
523
531
  {offsetof(struct MutableCFOptions, block_protection_bytes_per_key),
524
532
  OptionType::kUInt8T, OptionVerificationType::kNormal,
@@ -1118,11 +1126,12 @@ void MutableCFOptions::Dump(Logger* log) const {
1118
1126
  report_bg_io_stats);
1119
1127
  ROCKS_LOG_INFO(log, " compression: %d",
1120
1128
  static_cast<int>(compression));
1121
- ROCKS_LOG_INFO(log,
1122
- " experimental_mempurge_threshold: %f",
1129
+ ROCKS_LOG_INFO(log, " experimental_mempurge_threshold: %f",
1123
1130
  experimental_mempurge_threshold);
1124
1131
  ROCKS_LOG_INFO(log, " bottommost_file_compaction_delay: %" PRIu32,
1125
1132
  bottommost_file_compaction_delay);
1133
+ ROCKS_LOG_INFO(log, " uncache_aggressiveness: %" PRIu32,
1134
+ uncache_aggressiveness);
1126
1135
 
1127
1136
  // Universal Compaction Options
1128
1137
  ROCKS_LOG_INFO(log, "compaction_options_universal.size_ratio : %d",
@@ -1137,6 +1146,8 @@ void MutableCFOptions::Dump(Logger* log) const {
1137
1146
  ROCKS_LOG_INFO(log,
1138
1147
  "compaction_options_universal.compression_size_percent : %d",
1139
1148
  compaction_options_universal.compression_size_percent);
1149
+ ROCKS_LOG_INFO(log, "compaction_options_universal.max_read_amp: %d",
1150
+ compaction_options_universal.max_read_amp);
1140
1151
  ROCKS_LOG_INFO(log, "compaction_options_universal.stop_style : %d",
1141
1152
  compaction_options_universal.stop_style);
1142
1153
  ROCKS_LOG_INFO(