@nxtedition/rocksdb 8.2.0 → 8.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/binding.cc +3 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -345
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
  15. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
  17. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
  25. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
  26. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
  30. package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
  31. package/deps/rocksdb/rocksdb/db/builder.h +2 -2
  32. package/deps/rocksdb/rocksdb/db/c.cc +76 -5
  33. package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
  34. package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
  51. package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
  52. package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
  55. package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
  58. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
  71. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
  72. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
  73. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
  74. package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
  75. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
  76. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
  77. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
  78. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
  79. package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
  80. package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
  81. package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
  82. package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
  83. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
  84. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
  85. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
  86. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
  87. package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
  88. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
  90. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
  93. package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
  94. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
  95. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
  96. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
  97. package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
  98. package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
  99. package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
  100. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
  101. package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
  103. package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
  104. package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
  105. package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
  106. package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
  107. package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
  108. package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
  109. package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
  110. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
  111. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
  113. package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
  114. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
  115. package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
  116. package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
  117. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
  118. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
  119. package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
  120. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  121. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
  122. package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
  123. package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
  124. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
  125. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
  126. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  127. package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
  128. package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
  129. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
  130. package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
  131. package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
  144. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
  145. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
  146. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
  149. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
  150. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
  151. package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
  152. package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
  153. package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
  154. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
  155. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  156. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
  157. package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
  158. package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
  159. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
  160. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
  161. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
  162. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
  163. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
  164. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
  165. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
  166. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
  167. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
  168. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
  169. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
  170. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
  171. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
  173. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
  174. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
  175. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
  176. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
  177. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
  178. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
  184. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
  186. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
  188. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
  189. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
  192. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
  193. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  194. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
  195. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
  196. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
  197. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
  198. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
  199. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
  200. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
  201. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  202. package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
  203. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  204. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
  205. package/deps/rocksdb/rocksdb/options/options.cc +12 -53
  206. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  207. package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
  208. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
  209. package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
  210. package/deps/rocksdb/rocksdb/port/lang.h +27 -0
  211. package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
  212. package/deps/rocksdb/rocksdb/src.mk +2 -0
  213. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
  214. package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
  215. package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
  216. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
  217. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
  218. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
  219. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
  220. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
  221. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
  222. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
  223. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
  224. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
  225. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
  226. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
  228. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
  229. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
  230. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
  232. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
  233. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
  234. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
  235. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
  236. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
  237. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
  238. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
  239. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
  240. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
  241. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
  242. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
  243. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
  244. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
  245. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
  247. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
  248. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
  249. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
  250. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
  251. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  252. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  253. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
  254. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
  255. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
  256. package/deps/rocksdb/rocksdb/table/format.cc +4 -4
  257. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  258. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
  259. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
  260. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  261. package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
  262. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
  264. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
  266. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
  267. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
  268. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
  269. package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
  270. package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
  271. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
  272. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
  275. package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
  277. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
  278. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
  279. package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
  280. package/deps/rocksdb/rocksdb/util/compression.h +1 -1
  281. package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
  282. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
  283. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
  284. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  285. package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
  286. package/deps/rocksdb/rocksdb/util/math.h +12 -7
  287. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
  288. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
  289. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
  290. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
  291. package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
  292. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
  293. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  294. package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
  295. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
  296. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
  297. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
  298. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
  299. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  300. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
  301. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
  302. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
  303. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
  304. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
  305. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
  306. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
  307. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
  308. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
  309. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
  310. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
  311. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
  312. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
  313. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
  314. package/package.json +1 -1
  315. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  316. package/prebuilds/linux-x64/node.napi.node +0 -0
  317. /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
  318. /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
  319. /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
  320. /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
  321. /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
@@ -59,43 +59,30 @@ enum CompactionPri : char {
59
59
  kRoundRobin = 0x4,
60
60
  };
61
61
 
62
- struct CompactionOptionsFIFO {
63
- // once the total sum of table files reaches this, we will delete the oldest
64
- // table file
65
- // Default: 1GB
66
- uint64_t max_table_files_size;
67
-
68
- // If true, try to do compaction to compact smaller files into larger ones.
69
- // Minimum files to compact follows options.level0_file_num_compaction_trigger
70
- // and compaction won't trigger if average compact bytes per del file is
71
- // larger than options.write_buffer_size. This is to protect large files
72
- // from being compacted again.
73
- // Default: false;
74
- bool allow_compaction = false;
75
-
76
- // When not 0, if the data in the file is older than this threshold, RocksDB
77
- // will soon move the file to warm temperature.
78
- uint64_t age_for_warm = 0;
79
-
80
- CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
81
- CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction)
82
- : max_table_files_size(_max_table_files_size),
83
- allow_compaction(_allow_compaction) {}
84
- };
85
-
86
62
  // Compression options for different compression algorithms like Zlib
87
63
  struct CompressionOptions {
64
+ // ==> BEGIN options that can be set by deprecated configuration syntax, <==
65
+ // ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false <==
66
+ // ==> Please use compression_opts={level=6;strategy=7;} form instead. <==
67
+
88
68
  // RocksDB's generic default compression level. Internally it'll be translated
89
69
  // to the default compression level specific to the library being used (see
90
70
  // comment above `ColumnFamilyOptions::compression`).
91
71
  //
92
72
  // The default value is the max 16-bit int as it'll be written out in OPTIONS
93
73
  // file, which should be portable.
94
- const static int kDefaultCompressionLevel = 32767;
74
+ static constexpr int kDefaultCompressionLevel = 32767;
75
+
76
+ // zlib only: windowBits parameter. See https://www.zlib.net/manual.html
77
+ int window_bits = -14;
95
78
 
96
- int window_bits;
97
- int level;
98
- int strategy;
79
+ // Compression "level" applicable to zstd, zlib, LZ4. Except for
80
+ // kDefaultCompressionLevel (see above), the meaning of each value depends
81
+ // on the compression algorithm.
82
+ int level = kDefaultCompressionLevel;
83
+
84
+ // zlib only: strategy parameter. See https://www.zlib.net/manual.html
85
+ int strategy = 0;
99
86
 
100
87
  // Maximum size of dictionaries used to prime the compression library.
101
88
  // Enabling dictionary can improve compression ratios when there are
@@ -117,18 +104,14 @@ struct CompressionOptions {
117
104
  // If block cache insertion fails with `Status::MemoryLimit` (i.e., it is
118
105
  // full), we finalize the dictionary with whatever data we have and then stop
119
106
  // buffering.
120
- //
121
- // Default: 0.
122
- uint32_t max_dict_bytes;
107
+ uint32_t max_dict_bytes = 0;
123
108
 
124
109
  // Maximum size of training data passed to zstd's dictionary trainer. Using
125
110
  // zstd's dictionary trainer can achieve even better compression ratio
126
111
  // improvements than using `max_dict_bytes` alone.
127
112
  //
128
113
  // The training data will be used to generate a dictionary of max_dict_bytes.
129
- //
130
- // Default: 0.
131
- uint32_t zstd_max_train_bytes;
114
+ uint32_t zstd_max_train_bytes = 0;
132
115
 
133
116
  // Number of threads for parallel compression.
134
117
  // Parallel compression is enabled only if threads > 1.
@@ -141,9 +124,7 @@ struct CompressionOptions {
141
124
  // compressed size is in flight when compression is parallelized. To be
142
125
  // reasonably accurate, this inflation is also estimated by using historical
143
126
  // compression ratio and current bytes inflight.
144
- //
145
- // Default: 1.
146
- uint32_t parallel_threads;
127
+ uint32_t parallel_threads = 1;
147
128
 
148
129
  // When the compression options are set by the user, it will be set to "true".
149
130
  // For bottommost_compression_opts, to enable it, user must set enabled=true.
@@ -152,9 +133,7 @@ struct CompressionOptions {
152
133
  //
153
134
  // For compression_opts, if compression_opts.enabled=false, it is still
154
135
  // used as compression options for compression process.
155
- //
156
- // Default: false.
157
- bool enabled;
136
+ bool enabled = false;
158
137
 
159
138
  // Limit on data buffering when gathering samples to build a dictionary. Zero
160
139
  // means no limit. When dictionary is disabled (`max_dict_bytes == 0`),
@@ -173,9 +152,7 @@ struct CompressionOptions {
173
152
  // `zstd_max_train_bytes` (when enabled) can restrict how many samples we can
174
153
  // pass to the dictionary trainer. Configuring it below `max_dict_bytes` can
175
154
  // restrict the size of the final dictionary.
176
- //
177
- // Default: 0 (unlimited)
178
- uint64_t max_dict_buffer_bytes;
155
+ uint64_t max_dict_buffer_bytes = 0;
179
156
 
180
157
  // Use zstd trainer to generate dictionaries. When this option is set to true,
181
158
  // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
@@ -187,34 +164,29 @@ struct CompressionOptions {
187
164
  // data will be passed to this API. Using this API should save CPU time on
188
165
  // dictionary training, but the compression ratio may not be as good as using
189
166
  // a dictionary trainer.
190
- //
191
- // Default: true
192
- bool use_zstd_dict_trainer;
193
-
194
- CompressionOptions()
195
- : window_bits(-14),
196
- level(kDefaultCompressionLevel),
197
- strategy(0),
198
- max_dict_bytes(0),
199
- zstd_max_train_bytes(0),
200
- parallel_threads(1),
201
- enabled(false),
202
- max_dict_buffer_bytes(0),
203
- use_zstd_dict_trainer(true) {}
204
- CompressionOptions(int wbits, int _lev, int _strategy,
205
- uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
206
- uint32_t _parallel_threads, bool _enabled,
207
- uint64_t _max_dict_buffer_bytes,
208
- bool _use_zstd_dict_trainer)
209
- : window_bits(wbits),
210
- level(_lev),
211
- strategy(_strategy),
212
- max_dict_bytes(_max_dict_bytes),
213
- zstd_max_train_bytes(_zstd_max_train_bytes),
214
- parallel_threads(_parallel_threads),
215
- enabled(_enabled),
216
- max_dict_buffer_bytes(_max_dict_buffer_bytes),
217
- use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
167
+ bool use_zstd_dict_trainer = true;
168
+
169
+ // ===> END options that can be set by deprecated configuration syntax <===
170
+ // ===> Use compression_opts={level=6;strategy=7;} form for below opts <===
171
+
172
+ // Essentially specifies a minimum acceptable compression ratio. A block is
173
+ // stored uncompressed if the compressed block does not achieve this ratio,
174
+ // because the downstream cost of decompression is not considered worth such
175
+ // a small savings (if any).
176
+ // However, the ratio is specified in a way that is efficient for checking.
177
+ // An integer from 1 to 1024 indicates the maximum allowable compressed bytes
178
+ // per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value.
179
+ // For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio().
180
+ // Default: abandon use of compression for a specific block or entry if
181
+ // compressed by less than 12.5% (minimum ratio of 1.143:1).
182
+ int max_compressed_bytes_per_kb = 1024 * 7 / 8;
183
+
184
+ // A convenience function for setting max_compressed_bytes_per_kb based on a
185
+ // minimum acceptable compression ratio (uncompressed size over compressed
186
+ // size).
187
+ void SetMinRatio(double min_ratio) {
188
+ max_compressed_bytes_per_kb = static_cast<int>(1024.0 / min_ratio + 0.5);
189
+ }
218
190
  };
219
191
 
220
192
  // Temperature of a file. Used to pass to FileSystem for a different
@@ -229,6 +201,60 @@ enum class Temperature : uint8_t {
229
201
  kLastTemperature,
230
202
  };
231
203
 
204
+ struct FileTemperatureAge {
205
+ Temperature temperature = Temperature::kUnknown;
206
+ uint64_t age = 0;
207
+ };
208
+
209
+ struct CompactionOptionsFIFO {
210
+ // once the total sum of table files reaches this, we will delete the oldest
211
+ // table file
212
+ // Default: 1GB
213
+ uint64_t max_table_files_size;
214
+
215
+ // If true, try to do compaction to compact smaller files into larger ones.
216
+ // Minimum files to compact follows options.level0_file_num_compaction_trigger
217
+ // and compaction won't trigger if average compact bytes per del file is
218
+ // larger than options.write_buffer_size. This is to protect large files
219
+ // from being compacted again.
220
+ // Default: false;
221
+ bool allow_compaction = false;
222
+
223
+ // DEPRECATED
224
+ // When not 0, if the data in the file is older than this threshold, RocksDB
225
+ // will soon move the file to warm temperature.
226
+ uint64_t age_for_warm = 0;
227
+
228
+ // EXPERIMENTAL
229
+ // Age (in seconds) threshold for different file temperatures.
230
+ // When not empty, each element specifies an age threshold `age` and a
231
+ // temperature such that if all the data in a file is older than `age`,
232
+ // RocksDB will compact the file to the specified `temperature`.
233
+ //
234
+ // Note:
235
+ // - Flushed files will always have temperature kUnknown.
236
+ // - Compaction output files will have temperature kUnknown by default, so
237
+ // only temperatures other than kUnknown needs to be specified.
238
+ // - The elements should be in increasing order with respect to `age` field.
239
+ //
240
+ // Dynamically changeable through SetOptions() API, e.g.,
241
+ // SetOptions("compaction_options_fifo",
242
+ // "{file_temperature_age_thresholds={
243
+ // {age=10;temperature=kWarm}:{age=20;temperature=kCold}}}")
244
+ // In this example, all files that are at least 20 seconds old will be
245
+ // compacted and output files will have temperature kCold. All files that are
246
+ // at least 10 seconds old but younger than 20 seconds will be compacted to
247
+ // files with temperature kWarm.
248
+ //
249
+ // Default: empty
250
+ std::vector<FileTemperatureAge> file_temperature_age_thresholds{};
251
+
252
+ CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
253
+ CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction)
254
+ : max_table_files_size(_max_table_files_size),
255
+ allow_compaction(_allow_compaction) {}
256
+ };
257
+
232
258
  // The control option of how the cache tiers will be used. Currently rocksdb
233
259
  // support block cache (volatile tier), secondary cache (non-volatile tier).
234
260
  // In the future, we may add more caching layers.
@@ -592,7 +618,7 @@ struct AdvancedColumnFamilyOptions {
592
618
  // and max_bytes_for_level_base=10MB.
593
619
  // Target sizes of level 1 to 5 starts with:
594
620
  // [- - - - 10MB]
595
- // with base level is level. Target sizes of level 1 to 4 are not applicable
621
+ // with base level is level 5. Target sizes of level 1 to 4 are not applicable
596
622
  // because they will not be used.
597
623
  // Until the size of Level 5 grows to more than 10MB, say 11MB, we make
598
624
  // base target to level 4 and now the targets looks like:
@@ -642,8 +668,31 @@ struct AdvancedColumnFamilyOptions {
642
668
  //
643
669
  // max_bytes_for_level_multiplier_additional is ignored with this flag on.
644
670
  //
645
- // Turning this feature on or off for an existing DB can cause unexpected
646
- // LSM tree structure so it's not recommended.
671
+ // To make the migration easier, when turning this feature on, files in the
672
+ // LSM will be trivially moved down to fill the LSM starting from the
673
+ // bottommost level during DB open. For example, if the LSM looks like:
674
+ // L0: f0, f1
675
+ // L1: f2, f3
676
+ // L2: f4
677
+ // L3:
678
+ // L4: f5
679
+ // and the DB is opened with num_levels = 7 with this feature turned on,
680
+ // new LSM after DB open looks like the following:
681
+ // L0: f0, f1, (and possibly data flushed from WAL)
682
+ // L4: f2, f3
683
+ // L5: f4
684
+ // L6: f5
685
+ //
686
+ // If `allow_ingest_behind=true` or `preclude_last_level_data_seconds > 0`,
687
+ // then the last level is reserved, and we will start filling LSM from the
688
+ // second last level (L5 in the above example).
689
+ //
690
+ // Note that there may be excessive levels (where target level size is 0 when
691
+ // computed based on this feature) in the LSM after a user migrates to turn
692
+ // this feature on. This is especially likely when a user migrates from
693
+ // leveled compaction with a smaller multiplier or from universal compaction.
694
+ // RocksDB will gradually drain these unnecessary levels by compacting files
695
+ // down the LSM.
647
696
  //
648
697
  // Default: false
649
698
  bool level_compaction_dynamic_level_bytes = false;
@@ -1086,6 +1135,37 @@ struct AdvancedColumnFamilyOptions {
1086
1135
  // Supported values: 0, 1, 2, 4, 8.
1087
1136
  uint32_t memtable_protection_bytes_per_key = 0;
1088
1137
 
1138
+ // UNDER CONSTRUCTION -- DO NOT USE
1139
+ // When the user-defined timestamp feature is enabled, this flag controls
1140
+ // whether the user-defined timestamps will be persisted.
1141
+ //
1142
+ // When it's false, the user-defined timestamps will be removed from the user
1143
+ // keys when data is flushed from memtables to SST files. Other places that
1144
+ // user keys can be persisted like WAL and blob files go through a similar
1145
+ // process. Users should call `DB::IncreaseFullHistoryTsLow` to set a cutoff
1146
+ // timestamp. RocksDB refrains from flushing a memtable with data still above
1147
+ // the cutoff timestamp with best effort. When users try to read below the
1148
+ // cutoff timestamp, an error will be returned.
1149
+ //
1150
+ // Default: true (user-defined timestamps are persisted)
1151
+ // Not dynamically changeable, change it requires db restart and
1152
+ // only compatible changes are allowed.
1153
+ bool persist_user_defined_timestamps = true;
1154
+
1155
+ // Enable/disable per key-value checksum protection for in memory blocks.
1156
+ //
1157
+ // Checksum is constructed when a block is loaded into memory and verification
1158
+ // is done for each key read from the block. This is useful for detecting
1159
+ // in-memory data corruption. Note that this feature has a non-trivial
1160
+ // negative impact on read performance. Different values of the
1161
+ // option have similar performance impact, but different memory cost and
1162
+ // corruption detection probability (e.g. 1 byte gives 255/256 chance for
1163
+ // detecting a corruption).
1164
+ //
1165
+ // Default: 0 (no protection)
1166
+ // Supported values: 0, 1, 2, 4, 8.
1167
+ uint8_t block_protection_bytes_per_key = 0;
1168
+
1089
1169
  // Create ColumnFamilyOptions with default values for all fields
1090
1170
  AdvancedColumnFamilyOptions();
1091
1171
  // Create ColumnFamilyOptions from Options
@@ -62,7 +62,6 @@
62
62
  extern "C" {
63
63
  #endif
64
64
 
65
- #include <stdarg.h>
66
65
  #include <stdbool.h>
67
66
  #include <stddef.h>
68
67
  #include <stdint.h>
@@ -1719,7 +1718,8 @@ enum {
1719
1718
  rocksdb_blob_checksum_time,
1720
1719
  rocksdb_blob_decompress_time,
1721
1720
  rocksdb_internal_range_del_reseek_count,
1722
- rocksdb_total_metric_count = 78
1721
+ rocksdb_block_read_cpu_time,
1722
+ rocksdb_total_metric_count = 79
1723
1723
  };
1724
1724
 
1725
1725
  extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int);
@@ -2007,20 +2007,26 @@ extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru(
2007
2007
  extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
2008
2008
  rocksdb_cache_create_lru_with_strict_capacity_limit(size_t capacity);
2009
2009
  extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru_opts(
2010
- rocksdb_lru_cache_options_t*);
2010
+ const rocksdb_lru_cache_options_t*);
2011
+
2011
2012
  extern ROCKSDB_LIBRARY_API void rocksdb_cache_destroy(rocksdb_cache_t* cache);
2012
2013
  extern ROCKSDB_LIBRARY_API void rocksdb_cache_disown_data(
2013
2014
  rocksdb_cache_t* cache);
2014
2015
  extern ROCKSDB_LIBRARY_API void rocksdb_cache_set_capacity(
2015
2016
  rocksdb_cache_t* cache, size_t capacity);
2016
2017
  extern ROCKSDB_LIBRARY_API size_t
2017
- rocksdb_cache_get_capacity(rocksdb_cache_t* cache);
2018
+ rocksdb_cache_get_capacity(const rocksdb_cache_t* cache);
2019
+ extern ROCKSDB_LIBRARY_API size_t
2020
+ rocksdb_cache_get_usage(const rocksdb_cache_t* cache);
2021
+ extern ROCKSDB_LIBRARY_API size_t
2022
+ rocksdb_cache_get_pinned_usage(const rocksdb_cache_t* cache);
2018
2023
  extern ROCKSDB_LIBRARY_API size_t
2019
- rocksdb_cache_get_usage(rocksdb_cache_t* cache);
2024
+ rocksdb_cache_get_table_address_count(const rocksdb_cache_t* cache);
2020
2025
  extern ROCKSDB_LIBRARY_API size_t
2021
- rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache);
2026
+ rocksdb_cache_get_occupancy_count(const rocksdb_cache_t* cache);
2022
2027
 
2023
2028
  /* HyperClockCache */
2029
+
2024
2030
  extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t*
2025
2031
  rocksdb_hyper_clock_cache_options_create(size_t capacity,
2026
2032
  size_t estimated_entry_charge);
@@ -2041,7 +2047,8 @@ rocksdb_hyper_clock_cache_options_set_memory_allocator(
2041
2047
  extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
2042
2048
  size_t capacity, size_t estimated_entry_charge);
2043
2049
  extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
2044
- rocksdb_cache_create_hyper_clock_opts(rocksdb_hyper_clock_cache_options_t*);
2050
+ rocksdb_cache_create_hyper_clock_opts(
2051
+ const rocksdb_hyper_clock_cache_options_t*);
2045
2052
 
2046
2053
  /* DBPath */
2047
2054
 
@@ -2541,6 +2548,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get(
2541
2548
  const size_t* keys_list_sizes, char** values_list,
2542
2549
  size_t* values_list_sizes, char** errs);
2543
2550
 
2551
+ extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_for_update(
2552
+ rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
2553
+ size_t num_keys, const char* const* keys_list,
2554
+ const size_t* keys_list_sizes, char** values_list,
2555
+ size_t* values_list_sizes, char** errs);
2556
+
2544
2557
  extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_cf(
2545
2558
  rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
2546
2559
  const rocksdb_column_family_handle_t* const* column_families,
@@ -2548,6 +2561,13 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_cf(
2548
2561
  const size_t* keys_list_sizes, char** values_list,
2549
2562
  size_t* values_list_sizes, char** errs);
2550
2563
 
2564
+ extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_for_update_cf(
2565
+ rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
2566
+ const rocksdb_column_family_handle_t* const* column_families,
2567
+ size_t num_keys, const char* const* keys_list,
2568
+ const size_t* keys_list_sizes, char** values_list,
2569
+ size_t* values_list_sizes, char** errs);
2570
+
2551
2571
  extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_get(
2552
2572
  rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
2553
2573
  const char* key, size_t klen, size_t* vlen, char** errptr);
@@ -25,6 +25,26 @@ class Cache; // defined in advanced_cache.h
25
25
  struct ConfigOptions;
26
26
  class SecondaryCache;
27
27
 
28
+ // These definitions begin source compatibility for a future change in which
29
+ // a specific class for block cache is split away from general caches, so that
30
+ // the block cache API can continue to become more specialized and
31
+ // customizeable, including in ways incompatible with a general cache. For
32
+ // example, HyperClockCache is not usable as a general cache because it expects
33
+ // only fixed-size block cache keys, but this limitation is not yet reflected
34
+ // in the API function signatures.
35
+ // * Phase 1 (done) - Make both BlockCache and GeneralCache aliases for Cache,
36
+ // and make a factory function for general caches. Encourage users of row_cache
37
+ // (not common) to switch to the factory function for general caches.
38
+ // * Phase 2 - Split off GenericCache as its own class, removing secondary
39
+ // cache support features and more from the API to simplify it. Between Phase 1
40
+ // and Phase 2 users of row_cache will need to update their code. Any time
41
+ // after Phase 2, the block cache API can become more specialized in ways
42
+ // incompatible with general caches.
43
+ // * Phase 3 - Move existing RocksDB uses of Cache to BlockCache, and deprecate
44
+ // (but not yet remove) Cache as an alias for BlockCache.
45
+ using BlockCache = Cache;
46
+ using GeneralCache = Cache;
47
+
28
48
  // Classifications of block cache entries.
29
49
  //
30
50
  // Developer notes: Adding a new enum to this class requires corresponding
@@ -135,9 +155,39 @@ struct ShardedCacheOptions {
135
155
  CacheMetadataChargePolicy metadata_charge_policy =
136
156
  kDefaultCacheMetadataChargePolicy;
137
157
 
138
- // A SecondaryCache instance to use the non-volatile tier.
158
+ // A SecondaryCache instance to use the non-volatile tier. For a GeneralCache
159
+ // this option must be kept as default empty.
139
160
  std::shared_ptr<SecondaryCache> secondary_cache;
140
161
 
162
+ // See hash_seed comments below
163
+ static constexpr int32_t kQuasiRandomHashSeed = -1;
164
+ static constexpr int32_t kHostHashSeed = -2;
165
+
166
+ // EXPERT OPTION: Specifies how a hash seed should be determined for the
167
+ // cache, or specifies a specific seed (only recommended for diagnostics or
168
+ // testing).
169
+ //
170
+ // Background: it could be dangerous to have different cache instances
171
+ // access the same SST files with the same hash seed, as correlated unlucky
172
+ // hashing across hosts or restarts could cause a widespread issue, rather
173
+ // than an isolated one. For example, with smaller block caches, it is
174
+ // possible for large full Bloom filters in a set of SST files to be randomly
175
+ // clustered into one cache shard, causing mutex contention or a thrashing
176
+ // condition as there's little or no space left for other entries assigned to
177
+ // the shard. If a set of SST files is broadcast and used on many hosts, we
178
+ // should ensure all have an independent chance of balanced shards.
179
+ //
180
+ // Values >= 0 will be treated as fixed hash seeds. Values < 0 are reserved
181
+ // for methods of dynamically choosing a seed, currently:
182
+ // * kQuasiRandomHashSeed - Each cache created chooses a seed mostly randomly,
183
+ // except that within a process, no seed is repeated until all have been
184
+ // issued.
185
+ // * kHostHashSeed - The seed is determined based on hashing the host name.
186
+ // Although this is arguably slightly worse for production reliability, it
187
+ // solves the essential problem of cross-host correlation while ensuring
188
+ // repeatable behavior on a host, for diagnostic purposes.
189
+ int32_t hash_seed = kHostHashSeed;
190
+
141
191
  ShardedCacheOptions() {}
142
192
  ShardedCacheOptions(
143
193
  size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
@@ -151,6 +201,13 @@ struct ShardedCacheOptions {
151
201
  metadata_charge_policy(_metadata_charge_policy) {}
152
202
  };
153
203
 
204
+ // LRUCache - A cache using LRU eviction to stay at or below a set capacity.
205
+ // The cache is sharded to 2^num_shard_bits shards, by hash of the key.
206
+ // The total capacity is divided and evenly assigned to each shard, and each
207
+ // shard has its own LRU list for evictions. Each shard also has a mutex for
208
+ // exclusive access during operations; even read operations need exclusive
209
+ // access in order to update the LRU list. Mutex contention is usually low
210
+ // with enough shards.
154
211
  struct LRUCacheOptions : public ShardedCacheOptions {
155
212
  // Ratio of cache reserved for high-priority and low-priority entries,
156
213
  // respectively. (See Cache::Priority below more information on the levels.)
@@ -158,7 +215,8 @@ struct LRUCacheOptions : public ShardedCacheOptions {
158
215
  // values cannot exceed 1.
159
216
  //
160
217
  // If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU
161
- // list is maintained by the cache. Similarly, if low_pri_pool_ratio is
218
+ // list is maintained by the cache. A ratio of 0.5 means non-high-priority
219
+ // entries will use midpoint insertion. Similarly, if low_pri_pool_ratio is
162
220
  // greater than zero, a dedicated low-priority LRU list is maintained.
163
221
  // There is also a bottom-priority LRU list, which is always enabled and not
164
222
  // explicitly configurable. Entries are spilled over to the next available
@@ -173,9 +231,6 @@ struct LRUCacheOptions : public ShardedCacheOptions {
173
231
  // otherwise, they are placed in the bottom-priority pool.) This results
174
232
  // in lower-priority entries without hits getting evicted from the cache
175
233
  // sooner.
176
- //
177
- // Default values: high_pri_pool_ratio = 0.5 (which is referred to as
178
- // "midpoint insertion"), low_pri_pool_ratio = 0
179
234
  double high_pri_pool_ratio = 0.5;
180
235
  double low_pri_pool_ratio = 0.0;
181
236
 
@@ -199,31 +254,40 @@ struct LRUCacheOptions : public ShardedCacheOptions {
199
254
  high_pri_pool_ratio(_high_pri_pool_ratio),
200
255
  low_pri_pool_ratio(_low_pri_pool_ratio),
201
256
  use_adaptive_mutex(_use_adaptive_mutex) {}
257
+
258
+ // Construct an instance of LRUCache using these options
259
+ std::shared_ptr<Cache> MakeSharedCache() const;
260
+
261
+ // Construct an instance of LRUCache for use as a general cache (e.g. for
262
+ // row_cache). Some options are not relevant to general caches.
263
+ std::shared_ptr<GeneralCache> MakeSharedGeneralCache() const;
202
264
  };
203
265
 
204
- // Create a new cache with a fixed size capacity. The cache is sharded
205
- // to 2^num_shard_bits shards, by hash of the key. The total capacity
206
- // is divided and evenly assigned to each shard. If strict_capacity_limit
207
- // is set, insert to the cache will fail when cache is full. User can also
208
- // set percentage of the cache reserves for high priority entries via
209
- // high_pri_pool_pct.
210
- // num_shard_bits = -1 means it is automatically determined: every shard
211
- // will be at least 512KB and number of shard bits will not exceed 6.
212
- extern std::shared_ptr<Cache> NewLRUCache(
266
+ // DEPRECATED wrapper function
267
+ inline std::shared_ptr<Cache> NewLRUCache(
213
268
  size_t capacity, int num_shard_bits = -1,
214
269
  bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
215
270
  std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
216
271
  bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
217
272
  CacheMetadataChargePolicy metadata_charge_policy =
218
273
  kDefaultCacheMetadataChargePolicy,
219
- double low_pri_pool_ratio = 0.0);
220
-
221
- extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
274
+ double low_pri_pool_ratio = 0.0) {
275
+ return LRUCacheOptions(capacity, num_shard_bits, strict_capacity_limit,
276
+ high_pri_pool_ratio, memory_allocator,
277
+ use_adaptive_mutex, metadata_charge_policy,
278
+ low_pri_pool_ratio)
279
+ .MakeSharedCache();
280
+ }
281
+
282
+ // DEPRECATED wrapper function
283
+ inline std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
284
+ return cache_opts.MakeSharedCache();
285
+ }
222
286
 
223
287
  // EXPERIMENTAL
224
- // Options structure for configuring a SecondaryCache instance based on
225
- // LRUCache. The LRUCacheOptions.secondary_cache is not used and
226
- // should not be set.
288
+ // Options structure for configuring a SecondaryCache instance with in-memory
289
+ // compression. The implementation uses LRUCache so inherits its options,
290
+ // except LRUCacheOptions.secondary_cache is not used and should not be set.
227
291
  struct CompressedSecondaryCacheOptions : LRUCacheOptions {
228
292
  // The compression method (if any) that is used to compress data.
229
293
  CompressionType compression_type = CompressionType::kLZ4Compression;
@@ -264,11 +328,16 @@ struct CompressedSecondaryCacheOptions : LRUCacheOptions {
264
328
  compress_format_version(_compress_format_version),
265
329
  enable_custom_split_merge(_enable_custom_split_merge),
266
330
  do_not_compress_roles(_do_not_compress_roles) {}
331
+
332
+ // Construct an instance of CompressedSecondaryCache using these options
333
+ std::shared_ptr<SecondaryCache> MakeSharedSecondaryCache() const;
334
+
335
+ // Avoid confusion with LRUCache
336
+ std::shared_ptr<Cache> MakeSharedCache() const = delete;
267
337
  };
268
338
 
269
- // EXPERIMENTAL
270
- // Create a new Secondary Cache that is implemented on top of LRUCache.
271
- extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
339
+ // DEPRECATED wrapper function
340
+ inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
272
341
  size_t capacity, int num_shard_bits = -1,
273
342
  bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
274
343
  double low_pri_pool_ratio = 0.0,
@@ -280,10 +349,21 @@ extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
280
349
  uint32_t compress_format_version = 2,
281
350
  bool enable_custom_split_merge = false,
282
351
  const CacheEntryRoleSet& _do_not_compress_roles = {
283
- CacheEntryRole::kFilterBlock});
284
-
285
- extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
286
- const CompressedSecondaryCacheOptions& opts);
352
+ CacheEntryRole::kFilterBlock}) {
353
+ return CompressedSecondaryCacheOptions(
354
+ capacity, num_shard_bits, strict_capacity_limit,
355
+ high_pri_pool_ratio, low_pri_pool_ratio, memory_allocator,
356
+ use_adaptive_mutex, metadata_charge_policy, compression_type,
357
+ compress_format_version, enable_custom_split_merge,
358
+ _do_not_compress_roles)
359
+ .MakeSharedSecondaryCache();
360
+ }
361
+
362
+ // DEPRECATED wrapper function
363
+ inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
364
+ const CompressedSecondaryCacheOptions& opts) {
365
+ return opts.MakeSharedSecondaryCache();
366
+ }
287
367
 
288
368
  // HyperClockCache - A lock-free Cache alternative for RocksDB block cache
289
369
  // that offers much improved CPU efficiency vs. LRUCache under high parallel
@@ -294,7 +374,6 @@ extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
294
374
  // * Requires an extra tuning parameter: see estimated_entry_charge below.
295
375
  // Similarly, substantially changing the capacity with SetCapacity could
296
376
  // harm efficiency.
297
- // * SecondaryCache is not yet supported.
298
377
  // * Cache priorities are less aggressively enforced, which could cause
299
378
  // cache dilution from long range scans (unless they use fill_cache=false).
300
379
  // * Can be worse for small caches, because if almost all of a cache shard is
@@ -1763,6 +1763,25 @@ class DB {
1763
1763
  const ExportImportFilesMetaData& metadata,
1764
1764
  ColumnFamilyHandle** handle) = 0;
1765
1765
 
1766
+ // EXPERIMENTAL
1767
+ // ClipColumnFamily() will clip the entries in the CF according to the range
1768
+ // [begin_key,
1769
+ // end_key).
1770
+ // Returns OK on success, and a non-OK status on error.
1771
+ // Any entries outside this range will be completely deleted (including
1772
+ // tombstones).
1773
+ // The main difference between ClipColumnFamily(begin, end) and
1774
+ // DeleteRange(begin, end)
1775
+ // is that the former physically deletes all keys outside the range, but is
1776
+ // more heavyweight than the latter.
1777
+ // This feature is mainly used to ensure that there is no overlapping Key when
1778
+ // calling
1779
+ // CreateColumnFamilyWithImports() to import multiple CFs.
1780
+ // Note that: concurrent updates cannot be performed during Clip.
1781
+ virtual Status ClipColumnFamily(ColumnFamilyHandle* column_family,
1782
+ const Slice& begin_key,
1783
+ const Slice& end_key) = 0;
1784
+
1766
1785
  // Verify the checksums of files in db. Currently the whole-file checksum of
1767
1786
  // table files are checked.
1768
1787
  virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
@@ -436,6 +436,14 @@ class Env : public Customizable {
436
436
  IO_TOTAL = 4
437
437
  };
438
438
 
439
+ // EXPERIMENTAL
440
+ enum class IOActivity : uint8_t {
441
+ kFlush = 0,
442
+ kCompaction = 1,
443
+ kDBOpen = 2,
444
+ kUnknown, // Keep last for easy array of non-unknowns
445
+ };
446
+
439
447
  // Arrange to run "(*function)(arg)" once in a background thread, in
440
448
  // the thread pool specified by pri. By default, jobs go to the 'LOW'
441
449
  // priority thread pool.
@@ -116,6 +116,8 @@ struct IOOptions {
116
116
  // directories and list only files in GetChildren API.
117
117
  bool do_not_recurse;
118
118
 
119
+ Env::IOActivity io_activity = Env::IOActivity::kUnknown;
120
+
119
121
  IOOptions() : IOOptions(false) {}
120
122
 
121
123
  explicit IOOptions(bool force_dir_fsync_)