@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -22,12 +22,37 @@
22
22
  #include "util/rate_limiter_impl.h"
23
23
 
24
24
  namespace ROCKSDB_NAMESPACE {
25
- const std::array<Histograms, std::size_t(Env::IOActivity::kUnknown)>
26
- kReadHistograms{{
27
- FILE_READ_FLUSH_MICROS,
28
- FILE_READ_COMPACTION_MICROS,
29
- FILE_READ_DB_OPEN_MICROS,
30
- }};
25
+ inline Histograms GetFileReadHistograms(Statistics* stats,
26
+ Env::IOActivity io_activity) {
27
+ switch (io_activity) {
28
+ case Env::IOActivity::kFlush:
29
+ return Histograms::FILE_READ_FLUSH_MICROS;
30
+ case Env::IOActivity::kCompaction:
31
+ return Histograms::FILE_READ_COMPACTION_MICROS;
32
+ case Env::IOActivity::kDBOpen:
33
+ return Histograms::FILE_READ_DB_OPEN_MICROS;
34
+ default:
35
+ break;
36
+ }
37
+
38
+ if (stats && stats->get_stats_level() > StatsLevel::kExceptDetailedTimers) {
39
+ switch (io_activity) {
40
+ case Env::IOActivity::kGet:
41
+ return Histograms::FILE_READ_GET_MICROS;
42
+ case Env::IOActivity::kMultiGet:
43
+ return Histograms::FILE_READ_MULTIGET_MICROS;
44
+ case Env::IOActivity::kDBIterator:
45
+ return Histograms::FILE_READ_DB_ITERATOR_MICROS;
46
+ case Env::IOActivity::kVerifyDBChecksum:
47
+ return Histograms::FILE_READ_VERIFY_DB_CHECKSUM_MICROS;
48
+ case Env::IOActivity::kVerifyFileChecksums:
49
+ return Histograms::FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS;
50
+ default:
51
+ break;
52
+ }
53
+ }
54
+ return Histograms::HISTOGRAM_ENUM_MAX;
55
+ }
31
56
  inline void RecordIOStats(Statistics* stats, Temperature file_temperature,
32
57
  bool is_last_level, size_t size) {
33
58
  IOSTATS_ADD(bytes_read, size);
@@ -79,11 +104,11 @@ IOStatus RandomAccessFileReader::Create(
79
104
  return io_s;
80
105
  }
81
106
 
82
- IOStatus RandomAccessFileReader::Read(
83
- const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
84
- char* scratch, AlignedBuf* aligned_buf,
85
- Env::IOPriority rate_limiter_priority) const {
107
+ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
108
+ size_t n, Slice* result, char* scratch,
109
+ AlignedBuf* aligned_buf) const {
86
110
  (void)aligned_buf;
111
+ const Env::IOPriority rate_limiter_priority = opts.rate_limiter_priority;
87
112
 
88
113
  TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read", nullptr);
89
114
 
@@ -97,17 +122,23 @@ IOStatus RandomAccessFileReader::Read(
97
122
 
98
123
  IOStatus io_s;
99
124
  uint64_t elapsed = 0;
125
+ size_t alignment = file_->GetRequiredBufferAlignment();
126
+ bool is_aligned = false;
127
+ if (scratch != nullptr) {
128
+ // Check if offset, length and buffer are aligned.
129
+ is_aligned = (offset & (alignment - 1)) == 0 &&
130
+ (n & (alignment - 1)) == 0 &&
131
+ (uintptr_t(scratch) & (alignment - 1)) == 0;
132
+ }
133
+
100
134
  {
101
135
  StopWatch sw(clock_, stats_, hist_type_,
102
- (opts.io_activity != Env::IOActivity::kUnknown)
103
- ? kReadHistograms[(std::size_t)(opts.io_activity)]
104
- : Histograms::HISTOGRAM_ENUM_MAX,
136
+ GetFileReadHistograms(stats_, opts.io_activity),
105
137
  (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
106
138
  true /*delay_enabled*/);
107
139
  auto prev_perf_level = GetPerfLevel();
108
140
  IOSTATS_TIMER_GUARD(read_nanos);
109
- if (use_direct_io()) {
110
- size_t alignment = file_->GetRequiredBufferAlignment();
141
+ if (use_direct_io() && is_aligned == false) {
111
142
  size_t aligned_offset =
112
143
  TruncateToPageBoundary(alignment, static_cast<size_t>(offset));
113
144
  size_t offset_advance = static_cast<size_t>(offset) - aligned_offset;
@@ -182,9 +213,9 @@ IOStatus RandomAccessFileReader::Read(
182
213
  if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
183
214
  sw.DelayStart();
184
215
  }
185
- allowed = rate_limiter_->RequestToken(n - pos, 0 /* alignment */,
186
- rate_limiter_priority, stats_,
187
- RateLimiter::OpType::kRead);
216
+ allowed = rate_limiter_->RequestToken(
217
+ n - pos, (use_direct_io() ? alignment : 0), rate_limiter_priority,
218
+ stats_, RateLimiter::OpType::kRead);
188
219
  if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
189
220
  sw.DelayStop();
190
221
  }
@@ -269,9 +300,10 @@ bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) {
269
300
  return true;
270
301
  }
271
302
 
272
- IOStatus RandomAccessFileReader::MultiRead(
273
- const IOOptions& opts, FSReadRequest* read_reqs, size_t num_reqs,
274
- AlignedBuf* aligned_buf, Env::IOPriority rate_limiter_priority) const {
303
+ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts,
304
+ FSReadRequest* read_reqs,
305
+ size_t num_reqs,
306
+ AlignedBuf* aligned_buf) const {
275
307
  (void)aligned_buf; // suppress warning of unused variable in LITE mode
276
308
  assert(num_reqs > 0);
277
309
 
@@ -280,6 +312,7 @@ IOStatus RandomAccessFileReader::MultiRead(
280
312
  assert(read_reqs[i].offset <= read_reqs[i + 1].offset);
281
313
  }
282
314
  #endif // !NDEBUG
315
+ const Env::IOPriority rate_limiter_priority = opts.rate_limiter_priority;
283
316
 
284
317
  // To be paranoid modify scratch a little bit, so in case underlying
285
318
  // FileSystem doesn't fill the buffer but return success and `scratch` returns
@@ -296,9 +329,7 @@ IOStatus RandomAccessFileReader::MultiRead(
296
329
  uint64_t elapsed = 0;
297
330
  {
298
331
  StopWatch sw(clock_, stats_, hist_type_,
299
- (opts.io_activity != Env::IOActivity::kUnknown)
300
- ? kReadHistograms[(std::size_t)(opts.io_activity)]
301
- : Histograms::HISTOGRAM_ENUM_MAX,
332
+ GetFileReadHistograms(stats_, opts.io_activity),
302
333
  (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
303
334
  true /*delay_enabled*/);
304
335
  auto prev_perf_level = GetPerfLevel();
@@ -314,14 +345,14 @@ IOStatus RandomAccessFileReader::MultiRead(
314
345
  // Align and merge the read requests.
315
346
  size_t alignment = file_->GetRequiredBufferAlignment();
316
347
  for (size_t i = 0; i < num_reqs; i++) {
317
- const auto& r = Align(read_reqs[i], alignment);
348
+ FSReadRequest r = Align(read_reqs[i], alignment);
318
349
  if (i == 0) {
319
350
  // head
320
- aligned_reqs.push_back(r);
351
+ aligned_reqs.push_back(std::move(r));
321
352
 
322
353
  } else if (!TryMerge(&aligned_reqs.back(), r)) {
323
354
  // head + n
324
- aligned_reqs.push_back(r);
355
+ aligned_reqs.push_back(std::move(r));
325
356
 
326
357
  } else {
327
358
  // unused
@@ -487,16 +518,16 @@ IOStatus RandomAccessFileReader::ReadAsync(
487
518
 
488
519
  assert(read_async_info->buf_.CurrentSize() == 0);
489
520
 
490
- StopWatch sw(clock_, nullptr /*stats*/,
491
- Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/,
492
- Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/,
521
+ StopWatch sw(clock_, stats_, hist_type_,
522
+ GetFileReadHistograms(stats_, opts.io_activity),
523
+ (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
493
524
  true /*delay_enabled*/);
494
525
  s = file_->ReadAsync(aligned_req, opts, read_async_callback,
495
526
  read_async_info, io_handle, del_fn, nullptr /*dbg*/);
496
527
  } else {
497
- StopWatch sw(clock_, nullptr /*stats*/,
498
- Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/,
499
- Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/,
528
+ StopWatch sw(clock_, stats_, hist_type_,
529
+ GetFileReadHistograms(stats_, opts.io_activity),
530
+ (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
500
531
  true /*delay_enabled*/);
501
532
  s = file_->ReadAsync(req, opts, read_async_callback, read_async_info,
502
533
  io_handle, del_fn, nullptr /*dbg*/);
@@ -164,31 +164,18 @@ class RandomAccessFileReader {
164
164
  // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns
165
165
  // the internally allocated buffer on return, and the result refers to a
166
166
  // region in aligned_buf.
167
- //
168
- // `rate_limiter_priority` is used to charge the internal rate limiter when
169
- // enabled. The special value `Env::IO_TOTAL` makes this operation bypass the
170
- // rate limiter.
171
167
  IOStatus Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
172
- char* scratch, AlignedBuf* aligned_buf,
173
- Env::IOPriority rate_limiter_priority) const;
168
+ char* scratch, AlignedBuf* aligned_buf) const;
174
169
 
175
170
  // REQUIRES:
176
171
  // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing.
177
172
  // In non-direct IO mode, aligned_buf should be null;
178
173
  // In direct IO mode, aligned_buf stores the aligned buffer allocated inside
179
174
  // MultiRead, the result Slices in reqs refer to aligned_buf.
180
- //
181
- // `rate_limiter_priority` will be used to charge the internal rate limiter.
182
- // It is not yet supported so the client must provide the special value
183
- // `Env::IO_TOTAL` to bypass the rate limiter.
184
175
  IOStatus MultiRead(const IOOptions& opts, FSReadRequest* reqs,
185
- size_t num_reqs, AlignedBuf* aligned_buf,
186
- Env::IOPriority rate_limiter_priority) const;
176
+ size_t num_reqs, AlignedBuf* aligned_buf) const;
187
177
 
188
- IOStatus Prefetch(uint64_t offset, size_t n,
189
- const Env::IOPriority rate_limiter_priority) const {
190
- IOOptions opts;
191
- opts.rate_limiter_priority = rate_limiter_priority;
178
+ IOStatus Prefetch(const IOOptions& opts, uint64_t offset, size_t n) const {
192
179
  return file_->Prefetch(offset, n, opts, nullptr);
193
180
  }
194
181
 
@@ -83,8 +83,9 @@ TEST_F(RandomAccessFileReaderTest, ReadDirectIO) {
83
83
  Slice result;
84
84
  AlignedBuf buf;
85
85
  for (Env::IOPriority rate_limiter_priority : {Env::IO_LOW, Env::IO_TOTAL}) {
86
- ASSERT_OK(r->Read(IOOptions(), offset, len, &result, nullptr, &buf,
87
- rate_limiter_priority));
86
+ IOOptions io_opts;
87
+ io_opts.rate_limiter_priority = rate_limiter_priority;
88
+ ASSERT_OK(r->Read(io_opts, offset, len, &result, nullptr, &buf));
88
89
  ASSERT_EQ(result.ToString(), content.substr(offset, len));
89
90
  }
90
91
  }
@@ -95,7 +96,18 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
95
96
  "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* reqs) {
96
97
  // Copy reqs, since it's allocated on stack inside MultiRead, which will
97
98
  // be deallocated after MultiRead returns.
98
- aligned_reqs = *reinterpret_cast<std::vector<FSReadRequest>*>(reqs);
99
+ size_t i = 0;
100
+ aligned_reqs.resize(
101
+ (*reinterpret_cast<std::vector<FSReadRequest>*>(reqs)).size());
102
+ for (auto& req :
103
+ (*reinterpret_cast<std::vector<FSReadRequest>*>(reqs))) {
104
+ aligned_reqs[i].offset = req.offset;
105
+ aligned_reqs[i].len = req.len;
106
+ aligned_reqs[i].result = req.result;
107
+ aligned_reqs[i].status = req.status;
108
+ aligned_reqs[i].scratch = req.scratch;
109
+ i++;
110
+ }
99
111
  });
100
112
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
101
113
 
@@ -135,8 +147,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
135
147
  reqs.push_back(std::move(r0));
136
148
  reqs.push_back(std::move(r1));
137
149
  AlignedBuf aligned_buf;
138
- ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf,
139
- Env::IO_TOTAL /* rate_limiter_priority */));
150
+ ASSERT_OK(
151
+ r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
140
152
 
141
153
  AssertResult(content, reqs);
142
154
 
@@ -180,8 +192,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
180
192
  reqs.push_back(std::move(r1));
181
193
  reqs.push_back(std::move(r2));
182
194
  AlignedBuf aligned_buf;
183
- ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf,
184
- Env::IO_TOTAL /* rate_limiter_priority */));
195
+ ASSERT_OK(
196
+ r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
185
197
 
186
198
  AssertResult(content, reqs);
187
199
 
@@ -225,8 +237,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
225
237
  reqs.push_back(std::move(r1));
226
238
  reqs.push_back(std::move(r2));
227
239
  AlignedBuf aligned_buf;
228
- ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf,
229
- Env::IO_TOTAL /* rate_limiter_priority */));
240
+ ASSERT_OK(
241
+ r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
230
242
 
231
243
  AssertResult(content, reqs);
232
244
 
@@ -262,8 +274,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
262
274
  reqs.push_back(std::move(r0));
263
275
  reqs.push_back(std::move(r1));
264
276
  AlignedBuf aligned_buf;
265
- ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf,
266
- Env::IO_TOTAL /* rate_limiter_priority */));
277
+ ASSERT_OK(
278
+ r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf));
267
279
 
268
280
  AssertResult(content, reqs);
269
281
 
@@ -283,7 +295,6 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) {
283
295
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
284
296
  }
285
297
 
286
-
287
298
  TEST(FSReadRequest, Align) {
288
299
  FSReadRequest r;
289
300
  r.offset = 2000;
@@ -99,6 +99,9 @@ class SequentialFileReader {
99
99
  // when less than n bytes are actually read (e.g. at end of file). To avoid
100
100
  // overcharging the rate limiter, the caller can use file size to cap n to
101
101
  // read until end of file.
102
+ //
103
+ // TODO(hx235): accept parameter `IOOptions` containing
104
+ // `rate_limiter_priority` like RandomAccessFileReader::Read()
102
105
  IOStatus Read(size_t n, Slice* result, char* scratch,
103
106
  Env::IOPriority rate_limiter_priority);
104
107
 
@@ -514,7 +514,8 @@ class Cache {
514
514
  // returns `true` if it has taken ownership of the Value (object), or
515
515
  // `false` if the cache should destroy it as usual. Regardless, Ref() and
516
516
  // Release() cannot be called on this Handle that is poised for eviction.
517
- using EvictionCallback = std::function<bool(const Slice& key, Handle* h)>;
517
+ using EvictionCallback =
518
+ std::function<bool(const Slice& key, Handle* h, bool was_hit)>;
518
519
  // Sets an eviction callback for this Cache. Not thread safe and only
519
520
  // supports being set once, so should only be used during initialization
520
521
  // or destruction, guaranteed before or after any thread-shared operations.
@@ -181,6 +181,14 @@ struct CompressionOptions {
181
181
  // compressed by less than 12.5% (minimum ratio of 1.143:1).
182
182
  int max_compressed_bytes_per_kb = 1024 * 7 / 8;
183
183
 
184
+ // ZSTD only.
185
+ // Enable compression algorithm's checksum feature.
186
+ // (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428)
187
+ // Each compressed frame will have a 32-bit checksum attached. The checksum
188
+ // computed from the uncompressed data and can be verified during
189
+ // decompression.
190
+ bool checksum = false;
191
+
184
192
  // A convenience function for setting max_compressed_bytes_per_kb based on a
185
193
  // minimum acceptable compression ratio (uncompressed size over compressed
186
194
  // size).
@@ -600,11 +608,11 @@ struct AdvancedColumnFamilyOptions {
600
608
  // 1. target size is in the range of
601
609
  // (max_bytes_for_level_base / max_bytes_for_level_multiplier,
602
610
  // max_bytes_for_level_base]
603
- // 2. target size of the last level (level num_levels-1) equals to extra size
604
- // of the level.
605
- // At the same time max_bytes_for_level_multiplier and
606
- // max_bytes_for_level_multiplier_additional are still satisfied.
607
- // (When L0 is too large, we make some adjustment. See below.)
611
+ // 2. target size of the last level (level num_levels-1) equals to the max
612
+ // size of a level in the LSM (typically the last level).
613
+ // At the same time max_bytes_for_level_multiplier is still satisfied.
614
+ // Note that max_bytes_for_level_multiplier_additional is ignored with this
615
+ // flag on.
608
616
  //
609
617
  // With this option on, from an empty DB, we make last level the base level,
610
618
  // which means merging L0 data into the last level, until it exceeds
@@ -642,60 +650,37 @@ struct AdvancedColumnFamilyOptions {
642
650
  // By doing it, we give max_bytes_for_level_multiplier a priority against
643
651
  // max_bytes_for_level_base, for a more predictable LSM tree shape. It is
644
652
  // useful to limit worse case space amplification.
645
- //
646
- //
647
- // If the compaction from L0 is lagged behind, a special mode will be turned
648
- // on to prioritize write amplification against max_bytes_for_level_multiplier
649
- // or max_bytes_for_level_base. The L0 compaction is lagged behind by looking
650
- // at number of L0 files and total L0 size. If number of L0 files is at least
651
- // the double of level0_file_num_compaction_trigger, or the total size is
652
- // at least max_bytes_for_level_base, this mode is on. The target of L1 grows
653
- // to the actual data size in L0, and then determine the target for each level
654
- // so that each level will have the same level multiplier.
655
- //
656
- // For example, when L0 size is 100MB, the size of last level is 1600MB,
657
- // max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10.
658
- // Since L0 size is larger than max_bytes_for_level_base, this is a L0
659
- // compaction backlogged mode. So that the L1 size is determined to be 100MB.
660
- // Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will
661
- // be needed. The level multiplier will be calculated to be 4 and the three
662
- // levels' target to be [100MB, 400MB, 1600MB].
663
- //
664
- // In this mode, The number of levels will be no more than the normal mode,
665
- // and the level multiplier will be lower. The write amplification will
666
- // likely to be reduced.
667
- //
668
- //
669
- // max_bytes_for_level_multiplier_additional is ignored with this flag on.
670
- //
671
- // To make the migration easier, when turning this feature on, files in the
672
- // LSM will be trivially moved down to fill the LSM starting from the
673
- // bottommost level during DB open. For example, if the LSM looks like:
674
- // L0: f0, f1
675
- // L1: f2, f3
676
- // L2: f4
677
- // L3:
678
- // L4: f5
679
- // and the DB is opened with num_levels = 7 with this feature turned on,
680
- // new LSM after DB open looks like the following:
681
- // L0: f0, f1, (and possibly data flushed from WAL)
682
- // L4: f2, f3
683
- // L5: f4
684
- // L6: f5
685
- //
686
653
  // If `allow_ingest_behind=true` or `preclude_last_level_data_seconds > 0`,
687
654
  // then the last level is reserved, and we will start filling LSM from the
688
- // second last level (L5 in the above example).
655
+ // second last level.
656
+ //
657
+ // With this option on, compaction is more adaptive to write traffic:
658
+ // Compaction priority will take into account estimated bytes to be compacted
659
+ // down to a level and favors compacting lower levels when there is a write
660
+ // traffic spike (and hence more compaction debt). Refer to
661
+ // https://github.com/facebook/rocksdb/wiki/Leveled-Compactio#option-level_compaction_dynamic_level_bytes-and-levels-target-size
662
+ // for more detailed description. See more implementation detail in:
663
+ // VersionStorageInfo::ComputeCompactionScore().
689
664
  //
665
+ // With this option on, unneeded levels will be drained automatically:
690
666
  // Note that there may be excessive levels (where target level size is 0 when
691
- // computed based on this feature) in the LSM after a user migrates to turn
692
- // this feature on. This is especially likely when a user migrates from
693
- // leveled compaction with a smaller multiplier or from universal compaction.
694
- // RocksDB will gradually drain these unnecessary levels by compacting files
695
- // down the LSM.
667
+ // computed based on this feature) in the LSM. This can happen after a user
668
+ // migrates to turn this feature on or deletes a lot of data. This is
669
+ // especially likely when a user migrates from leveled compaction with a
670
+ // smaller multiplier or from universal compaction. RocksDB will gradually
671
+ // drain these unnecessary levels by compacting files down the LSM. Smaller
672
+ // number of levels should help to reduce read amplification.
673
+ //
674
+ // Migration to turn on this option:
675
+ // - Before RocksDB v8.2, users are expected to do a full manual compaction
676
+ // and then restart DB to turn on this option.
677
+ // - Since RocksDB v8.2, users can just restart DB with this option on, as
678
+ // long as num_levels is no smaller than number of non-empty levels in the
679
+ // LSM. Migration will be done automatically by RocksDB. See more in
680
+ // https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#migrating-from-level_compaction_dynamic_level_bytesfalse-to-level_compaction_dynamic_level_bytestrue
696
681
  //
697
- // Default: false
698
- bool level_compaction_dynamic_level_bytes = false;
682
+ // Default: true
683
+ bool level_compaction_dynamic_level_bytes = true;
699
684
 
700
685
  // Allows RocksDB to generate files that are not exactly the target_file_size
701
686
  // only for the non-bottommost files. Which can reduce the write-amplification
@@ -714,6 +699,8 @@ struct AdvancedColumnFamilyOptions {
714
699
  // Different max-size multipliers for different levels.
715
700
  // These are multiplied by max_bytes_for_level_multiplier to arrive
716
701
  // at the max-size of each level.
702
+ // This option only applies to leveled compaction with
703
+ // `level_compaction_dynamic_level_bytes = false`.
717
704
  //
718
705
  // Default: 1
719
706
  //
@@ -879,30 +866,60 @@ struct AdvancedColumnFamilyOptions {
879
866
  // Dynamically changeable through SetOptions() API
880
867
  bool report_bg_io_stats = false;
881
868
 
882
- // Files containing updates older than TTL will go through the compaction
883
- // process. This usually happens in a cascading way so that those entries
884
- // will be compacted to bottommost level/file.
885
- // The feature is used to remove stale entries that have been deleted or
886
- // updated from the file system.
887
- // Pre-req: This needs max_open_files to be set to -1.
888
- // In Level: Non-bottom-level files older than TTL will go through the
889
- // compaction process.
890
- // In FIFO: Files older than TTL will be deleted.
869
+ // This option has different meanings for different compaction styles:
870
+ //
871
+ // Leveled: Non-bottom-level files with all keys older than TTL will go
872
+ // through the compaction process. This usually happens in a cascading
873
+ // way so that those entries will be compacted to bottommost level/file.
874
+ // The feature is used to remove stale entries that have been deleted or
875
+ // updated from the file system.
876
+ //
877
+ // FIFO: Files with all keys older than TTL will be deleted. TTL is only
878
+ // supported if option max_open_files is set to -1.
879
+ //
880
+ // Universal: users should only set the option `periodic_compaction_seconds`
881
+ // below instead. For backward compatibility, this option has the same
882
+ // meaning as `periodic_compaction_seconds`. See more in comments for
883
+ // `periodic_compaction_seconds` on the interaction between these two
884
+ // options.
885
+ //
886
+ // This option only supports block based table format for any compaction
887
+ // style.
888
+ //
891
889
  // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60
892
- // In FIFO, this option will have the same meaning as
893
- // periodic_compaction_seconds. Whichever stricter will be used.
894
890
  // 0 means disabling.
895
891
  // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to
896
892
  // pick default.
897
893
  //
898
- // Default: 30 days for leveled compaction + block based table. disable
899
- // otherwise.
894
+ // Default: 30 days if using block based table. 0 (disable) otherwise.
900
895
  //
901
896
  // Dynamically changeable through SetOptions() API
897
+ // Note that dynamically changing this option only works for leveled and FIFO
898
+ // compaction. For universal compaction, dynamically changing this option has
899
+ // no effect, users should dynamically change `periodic_compaction_seconds`
900
+ // instead.
902
901
  uint64_t ttl = 0xfffffffffffffffe;
903
902
 
904
- // Files older than this value will be picked up for compaction, and
905
- // re-written to the same level as they were before.
903
+ // This option has different meanings for different compaction styles:
904
+ //
905
+ // Leveled: files older than `periodic_compaction_seconds` will be picked up
906
+ // for compaction and will be re-written to the same level as they were
907
+ // before.
908
+ //
909
+ // FIFO: not supported. Setting this option has no effect for FIFO compaction.
910
+ //
911
+ // Universal: when there are files older than `periodic_compaction_seconds`,
912
+ // rocksdb will try to do as large a compaction as possible including the
913
+ // last level. Such compaction is only skipped if only last level is to
914
+ // be compacted and no file in last level is older than
915
+ // `periodic_compaction_seconds`. See more in
916
+ // UniversalCompactionBuilder::PickPeriodicCompaction().
917
+ // For backward compatibility, the effective value of this option takes
918
+ // into account the value of option `ttl`. The logic is as follows:
919
+ // - both options are set to 30 days if they have the default value.
920
+ // - if both options are zero, zero is picked. Otherwise, we take the min
921
+ // value among non-zero options values (i.e. takes the stricter limit).
922
+ //
906
923
  // One main use of the feature is to make sure a file goes through compaction
907
924
  // filters periodically. Users can also use the feature to clear up SST
908
925
  // files using old format.
@@ -912,25 +929,19 @@ struct AdvancedColumnFamilyOptions {
912
929
  // age is based on the file's last modified time (given by the underlying
913
930
  // Env).
914
931
  //
915
- // Supported in all compaction styles.
916
- // In Universal compaction, rocksdb will try to do a full compaction when
917
- // possible, see more in UniversalCompactionBuilder::PickPeriodicCompaction().
918
- // In FIFO compaction, this option has the same meaning as TTL and whichever
919
- // stricter will be used.
920
- // Pre-req: max_open_file == -1.
932
+ // This option only supports block based table format for any compaction
933
+ // style.
934
+ //
921
935
  // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60
922
936
  //
923
937
  // Values:
924
938
  // 0: Turn off Periodic compactions.
925
- // UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature
926
- // as needed. For now, RocksDB will change this value to 30 days
927
- // (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction
928
- // process at least once every 30 days if not compacted sooner.
929
- // In FIFO compaction, since the option has the same meaning as ttl,
930
- // when this value is left default, and ttl is left to 0, 30 days will be
931
- // used. Otherwise, min(ttl, periodic_compaction_seconds) will be used.
939
+ // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to
940
+ // pick default.
932
941
  //
933
- // Default: UINT64_MAX - 1 (allow RocksDB to auto-tune)
942
+ // Default: 30 days if using block based table format + compaction filter +
943
+ // leveled compaction or block based table format + universal compaction.
944
+ // 0 (disabled) otherwise.
934
945
  //
935
946
  // Dynamically changeable through SetOptions() API
936
947
  uint64_t periodic_compaction_seconds = 0xfffffffffffffffe;
@@ -957,6 +968,14 @@ struct AdvancedColumnFamilyOptions {
957
968
  Temperature bottommost_temperature = Temperature::kUnknown;
958
969
  Temperature last_level_temperature = Temperature::kUnknown;
959
970
 
971
+ // EXPERIMENTAL
972
+ // When this field is set, all SST files without an explicitly set temperature
973
+ // will be treated as if they have this temperature for file reading
974
+ // accounting purpose, such as io statistics, io perf context.
975
+ //
976
+ // Not dynamically changeable, change it requires db restart.
977
+ Temperature default_temperature = Temperature::kUnknown;
978
+
960
979
  // EXPERIMENTAL
961
980
  // The feature is still in development and is incomplete.
962
981
  // If this option is set, when data insert time is within this time range, it
@@ -1133,6 +1152,7 @@ struct AdvancedColumnFamilyOptions {
1133
1152
  //
1134
1153
  // Default: 0 (no protection)
1135
1154
  // Supported values: 0, 1, 2, 4, 8.
1155
+ // Dynamically changeable through the SetOptions() API.
1136
1156
  uint32_t memtable_protection_bytes_per_key = 0;
1137
1157
 
1138
1158
  // UNDER CONSTRUCTION -- DO NOT USE
@@ -1141,11 +1161,43 @@ struct AdvancedColumnFamilyOptions {
1141
1161
  //
1142
1162
  // When it's false, the user-defined timestamps will be removed from the user
1143
1163
  // keys when data is flushed from memtables to SST files. Other places that
1144
- // user keys can be persisted like WAL and blob files go through a similar
1145
- // process. Users should call `DB::IncreaseFullHistoryTsLow` to set a cutoff
1146
- // timestamp. RocksDB refrains from flushing a memtable with data still above
1147
- // the cutoff timestamp with best effort. When users try to read below the
1148
- // cutoff timestamp, an error will be returned.
1164
+ // user keys can be persisted like file boundaries in file metadata and blob
1165
+ // files go through a similar process. There are two major motivations
1166
+ // for this flag:
1167
+ // 1) backward compatibility: if the user later decides to
1168
+ // disable the user-defined timestamp feature for the column family, these SST
1169
+ // files can be handled by a user comparator that is not aware of user-defined
1170
+ // timestamps.
1171
+ // 2) enable user-defined timestamp feature for an existing column family
1172
+ // while set this flag to be `false`: user keys in the newly generated SST
1173
+ // files are of the same format as the existing SST files.
1174
+ //
1175
+ // Currently only user comparator that formats user-defined timesamps as
1176
+ // uint64_t via using one of the RocksDB provided comparator
1177
+ // `ComparatorWithU64TsImpl` are supported.
1178
+ //
1179
+ // When setting this flag to `false`, users should also call
1180
+ // `DB::IncreaseFullHistoryTsLow` to set a cutoff timestamp for flush. RocksDB
1181
+ // refrains from flushing a memtable with data still above
1182
+ // the cutoff timestamp with best effort. If this cutoff timestamp is not set,
1183
+ // flushing continues normally.
1184
+ // NOTE: in order for the cutoff timestamp to work properly, users of this
1185
+ // feature need to ensure to write to a column family with globally
1186
+ // non-decreasing user-defined timestamps.
1187
+ //
1188
+ // Users can do user-defined
1189
+ // multi-versioned read above the cutoff timestamp. When users try to read
1190
+ // below the cutoff timestamp, an error will be returned.
1191
+ //
1192
+ // Note that if WAL is enabled, unlike SST files, user-defined timestamps are
1193
+ // persisted to WAL even if this flag is set to `false`. The benefit of this
1194
+ // is that user-defined timestamps can be recovered with the caveat that users
1195
+ // should flush all memtables so there is no active WAL files before doing a
1196
+ // downgrade.
1197
+ //
1198
+ // Note that setting this flag to false is not supported in combination with
1199
+ // atomic flush, or concurrent memtable write enabled by
1200
+ // `allow_concurrent_memtable_write`.
1149
1201
  //
1150
1202
  // Default: true (user-defined timestamps are persisted)
1151
1203
  // Not dynamically changeable, change it requires db restart and
@@ -1164,8 +1216,21 @@ struct AdvancedColumnFamilyOptions {
1164
1216
  //
1165
1217
  // Default: 0 (no protection)
1166
1218
  // Supported values: 0, 1, 2, 4, 8.
1219
+ // Dynamically changeable through the SetOptions() API.
1167
1220
  uint8_t block_protection_bytes_per_key = 0;
1168
1221
 
1222
+ // For leveled compaction, RocksDB may compact a file at the bottommost level
1223
+ // if it can compact away data that were protected by some snapshot.
1224
+ // The compaction reason in LOG for this kind of compactions is
1225
+ // "BottommostFiles". Usually such compaction can happen as soon as a
1226
+ // relevant snapshot is released. This option allows user to delay
1227
+ // such compactions. A file is qualified for "BottommostFiles" compaction
1228
+ // if it is at least "bottommost_file_compaction_delay" seconds old.
1229
+ //
1230
+ // Default: 0 (no delay)
1231
+ // Dynamically changeable through the SetOptions() API.
1232
+ uint32_t bottommost_file_compaction_delay = 0;
1233
+
1169
1234
  // Create ColumnFamilyOptions with default values for all fields
1170
1235
  AdvancedColumnFamilyOptions();
1171
1236
  // Create ColumnFamilyOptions from Options