@nxtedition/rocksdb 8.2.0 → 8.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/binding.cc +3 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -345
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
  15. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
  17. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
  25. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
  26. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
  30. package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
  31. package/deps/rocksdb/rocksdb/db/builder.h +2 -2
  32. package/deps/rocksdb/rocksdb/db/c.cc +76 -5
  33. package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
  34. package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
  51. package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
  52. package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
  55. package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
  58. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
  71. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
  72. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
  73. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
  74. package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
  75. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
  76. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
  77. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
  78. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
  79. package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
  80. package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
  81. package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
  82. package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
  83. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
  84. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
  85. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
  86. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
  87. package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
  88. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
  90. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
  93. package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
  94. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
  95. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
  96. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
  97. package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
  98. package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
  99. package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
  100. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
  101. package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
  103. package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
  104. package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
  105. package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
  106. package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
  107. package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
  108. package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
  109. package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
  110. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
  111. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
  113. package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
  114. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
  115. package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
  116. package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
  117. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
  118. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
  119. package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
  120. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  121. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
  122. package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
  123. package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
  124. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
  125. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
  126. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  127. package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
  128. package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
  129. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
  130. package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
  131. package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
  144. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
  145. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
  146. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
  149. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
  150. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
  151. package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
  152. package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
  153. package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
  154. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
  155. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  156. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
  157. package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
  158. package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
  159. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
  160. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
  161. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
  162. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
  163. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
  164. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
  165. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
  166. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
  167. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
  168. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
  169. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
  170. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
  171. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
  173. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
  174. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
  175. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
  176. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
  177. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
  178. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
  184. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
  186. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
  188. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
  189. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
  192. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
  193. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  194. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
  195. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
  196. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
  197. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
  198. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
  199. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
  200. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
  201. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  202. package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
  203. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  204. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
  205. package/deps/rocksdb/rocksdb/options/options.cc +12 -53
  206. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  207. package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
  208. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
  209. package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
  210. package/deps/rocksdb/rocksdb/port/lang.h +27 -0
  211. package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
  212. package/deps/rocksdb/rocksdb/src.mk +2 -0
  213. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
  214. package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
  215. package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
  216. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
  217. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
  218. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
  219. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
  220. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
  221. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
  222. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
  223. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
  224. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
  225. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
  226. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
  228. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
  229. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
  230. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
  232. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
  233. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
  234. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
  235. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
  236. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
  237. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
  238. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
  239. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
  240. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
  241. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
  242. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
  243. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
  244. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
  245. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
  247. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
  248. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
  249. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
  250. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
  251. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  252. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  253. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
  254. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
  255. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
  256. package/deps/rocksdb/rocksdb/table/format.cc +4 -4
  257. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  258. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
  259. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
  260. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  261. package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
  262. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
  264. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
  266. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
  267. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
  268. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
  269. package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
  270. package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
  271. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
  272. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
  275. package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
  277. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
  278. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
  279. package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
  280. package/deps/rocksdb/rocksdb/util/compression.h +1 -1
  281. package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
  282. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
  283. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
  284. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  285. package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
  286. package/deps/rocksdb/rocksdb/util/math.h +12 -7
  287. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
  288. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
  289. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
  290. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
  291. package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
  292. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
  293. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  294. package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
  295. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
  296. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
  297. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
  298. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
  299. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  300. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
  301. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
  302. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
  303. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
  304. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
  305. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
  306. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
  307. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
  308. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
  309. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
  310. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
  311. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
  312. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
  313. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
  314. package/package.json +1 -1
  315. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  316. package/prebuilds/linux-x64/node.napi.node +0 -0
  317. /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
  318. /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
  319. /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
  320. /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
  321. /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
@@ -110,47 +110,64 @@ class SstFileWriter {
110
110
  Status Open(const std::string& file_path);
111
111
 
112
112
  // Add a Put key with value to currently opened file (deprecated)
113
- // REQUIRES: key is after any previously added key according to comparator.
113
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
114
+ // key according to the comparator.
114
115
  // REQUIRES: comparator is *not* timestamp-aware.
115
116
  ROCKSDB_DEPRECATED_FUNC Status Add(const Slice& user_key, const Slice& value);
116
117
 
117
118
  // Add a Put key with value to currently opened file
118
- // REQUIRES: key is after any previously added key according to comparator.
119
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
120
+ // key according to the comparator.
119
121
  // REQUIRES: comparator is *not* timestamp-aware.
120
122
  Status Put(const Slice& user_key, const Slice& value);
121
123
 
122
124
  // Add a Put (key with timestamp, value) to the currently opened file
123
- // REQUIRES: key is after any previously added key according to the
124
- // comparator.
125
- // REQUIRES: the timestamp's size is equal to what is expected by
126
- // the comparator.
125
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
126
+ // key according to the comparator.
127
+ // REQUIRES: timestamp's size is equal to what is expected by the comparator.
127
128
  Status Put(const Slice& user_key, const Slice& timestamp, const Slice& value);
128
129
 
129
130
  // Add a Merge key with value to currently opened file
130
- // REQUIRES: key is after any previously added key according to comparator.
131
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
132
+ // key according to the comparator.
131
133
  // REQUIRES: comparator is *not* timestamp-aware.
132
134
  Status Merge(const Slice& user_key, const Slice& value);
133
135
 
134
136
  // Add a deletion key to currently opened file
135
- // REQUIRES: key is after any previously added key according to comparator.
137
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
138
+ // key according to the comparator.
136
139
  // REQUIRES: comparator is *not* timestamp-aware.
137
140
  Status Delete(const Slice& user_key);
138
141
 
139
142
  // Add a deletion key with timestamp to the currently opened file
140
- // REQUIRES: key is after any previously added key according to the
141
- // comparator.
142
- // REQUIRES: the timestamp's size is equal to what is expected by
143
- // the comparator.
143
+ // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
144
+ // key according to the comparator.
145
+ // REQUIRES: timestamp's size is equal to what is expected by the comparator.
144
146
  Status Delete(const Slice& user_key, const Slice& timestamp);
145
147
 
146
- // Add a range deletion tombstone to currently opened file
148
+ // Add a range deletion tombstone to currently opened file. Such a range
149
+ // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the
150
+ // same file.
151
+ //
152
+ // Range deletion tombstones may be added in any order, both with respect to
153
+ // each other and with respect to the point (Put/Merge/Delete) keys in the
154
+ // same file.
155
+ //
156
+ // REQUIRES: The comparator orders `begin_key` at or before `end_key`
147
157
  // REQUIRES: comparator is *not* timestamp-aware.
148
158
  Status DeleteRange(const Slice& begin_key, const Slice& end_key);
149
159
 
150
- // Add a range deletion tombstone to currently opened file.
160
+ // Add a range deletion tombstone to currently opened file. Such a range
161
+ // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the
162
+ // same file.
163
+ //
164
+ // Range deletion tombstones may be added in any order, both with respect to
165
+ // each other and with respect to the point (Put/Merge/Delete) keys in the
166
+ // same file.
167
+ //
151
168
  // REQUIRES: begin_key and end_key are user keys without timestamp.
152
- // REQUIRES: the timestamp's size is equal to what is expected by
153
- // the comparator.
169
+ // REQUIRES: The comparator orders `begin_key` at or before `end_key`
170
+ // REQUIRES: timestamp's size is equal to what is expected by the comparator.
154
171
  Status DeleteRange(const Slice& begin_key, const Slice& end_key,
155
172
  const Slice& timestamp);
156
173
 
@@ -157,11 +157,16 @@ enum Tickers : uint32_t {
157
157
 
158
158
  NUMBER_MERGE_FAILURES,
159
159
 
160
- // number of times bloom was checked before creating iterator on a
161
- // file, and the number of times the check was useful in avoiding
162
- // iterator creation (and thus likely IOPs).
160
+ // Prefix filter stats when used for point lookups (Get / MultiGet).
161
+ // (For prefix filter stats on iterators, see *_LEVEL_SEEK_*.)
162
+ // Checked: filter was queried
163
163
  BLOOM_FILTER_PREFIX_CHECKED,
164
+ // Useful: filter returned false so prevented accessing data+index blocks
164
165
  BLOOM_FILTER_PREFIX_USEFUL,
166
+ // True positive: found a key matching the point query. When another key
167
+ // with the same prefix matches, it is considered a false positive by
168
+ // these statistics even though the filter returned a true positive.
169
+ BLOOM_FILTER_PREFIX_TRUE_POSITIVE,
165
170
 
166
171
  // Number of times we had to reseek inside an iteration to skip
167
172
  // over large number of keys with same userkey.
@@ -201,6 +206,7 @@ enum Tickers : uint32_t {
201
206
  NUMBER_BLOCK_COMPRESSED,
202
207
  NUMBER_BLOCK_DECOMPRESSED,
203
208
 
209
+ // DEPRECATED / unused (see NUMBER_BLOCK_COMPRESSION_*)
204
210
  NUMBER_BLOCK_NOT_COMPRESSED,
205
211
  MERGE_OPERATION_TOTAL_TIME,
206
212
  FILTER_OPERATION_TOTAL_TIME,
@@ -393,7 +399,39 @@ enum Tickers : uint32_t {
393
399
  NON_LAST_LEVEL_READ_BYTES,
394
400
  NON_LAST_LEVEL_READ_COUNT,
395
401
 
402
+ // Statistics on iterator Seek() (and variants) for each sorted run. I.e. a
403
+ // single user Seek() can result in many sorted run Seek()s.
404
+ // The stats are split between last level and non-last level.
405
+ // Filtered: a filter such as prefix Bloom filter indicate the Seek() would
406
+ // not find anything relevant, so avoided a likely access to data+index
407
+ // blocks.
408
+ LAST_LEVEL_SEEK_FILTERED,
409
+ // Filter match: a filter such as prefix Bloom filter was queried but did
410
+ // not filter out the seek.
411
+ LAST_LEVEL_SEEK_FILTER_MATCH,
412
+ // At least one data block was accessed for a Seek() (or variant) on a
413
+ // sorted run.
414
+ LAST_LEVEL_SEEK_DATA,
415
+ // At least one value() was accessed for the seek (suggesting it was useful),
416
+ // and no filter such as prefix Bloom was queried.
417
+ LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
418
+ // At least one value() was accessed for the seek (suggesting it was useful),
419
+ // after querying a filter such as prefix Bloom.
420
+ LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
421
+ // The same set of stats, but for non-last level seeks.
422
+ NON_LAST_LEVEL_SEEK_FILTERED,
423
+ NON_LAST_LEVEL_SEEK_FILTER_MATCH,
424
+ NON_LAST_LEVEL_SEEK_DATA,
425
+ NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
426
+ NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
427
+
428
+ // Number of block checksum verifications
396
429
  BLOCK_CHECKSUM_COMPUTE_COUNT,
430
+ // Number of times RocksDB detected a corruption while verifying a block
431
+ // checksum. RocksDB does not remember corruptions that happened during user
432
+ // reads so the same block corruption may be detected multiple times.
433
+ BLOCK_CHECKSUM_MISMATCH_COUNT,
434
+
397
435
  MULTIGET_COROUTINE_COUNT,
398
436
 
399
437
  // Integrated BlobDB specific stats
@@ -429,6 +467,42 @@ enum Tickers : uint32_t {
429
467
  // that finds its data for table open
430
468
  TABLE_OPEN_PREFETCH_TAIL_HIT,
431
469
 
470
+ // Statistics on the filtering by user-defined timestamps
471
+ // # of times timestamps are checked on accessing the table
472
+ TIMESTAMP_FILTER_TABLE_CHECKED,
473
+ // # of times timestamps can successfully help skip the table access
474
+ TIMESTAMP_FILTER_TABLE_FILTERED,
475
+
476
+ // Number of input bytes (uncompressed) to compression for SST blocks that
477
+ // are stored compressed.
478
+ BYTES_COMPRESSED_FROM,
479
+ // Number of output bytes (compressed) from compression for SST blocks that
480
+ // are stored compressed.
481
+ BYTES_COMPRESSED_TO,
482
+ // Number of uncompressed bytes for SST blocks that are stored uncompressed
483
+ // because compression type is kNoCompression, or some error case caused
484
+ // compression not to run or produce an output. Index blocks are only counted
485
+ // if enable_index_compression is true.
486
+ BYTES_COMPRESSION_BYPASSED,
487
+ // Number of input bytes (uncompressed) to compression for SST blocks that
488
+ // are stored uncompressed because the compression result was rejected,
489
+ // either because the ratio was not acceptable (see
490
+ // CompressionOptions::max_compressed_bytes_per_kb) or found invalid by the
491
+ // `verify_compression` option.
492
+ BYTES_COMPRESSION_REJECTED,
493
+
494
+ // Like BYTES_COMPRESSION_BYPASSED but counting number of blocks
495
+ NUMBER_BLOCK_COMPRESSION_BYPASSED,
496
+ // Like BYTES_COMPRESSION_REJECTED but counting number of blocks
497
+ NUMBER_BLOCK_COMPRESSION_REJECTED,
498
+
499
+ // Number of input bytes (compressed) to decompression in reading compressed
500
+ // SST blocks from storage.
501
+ BYTES_DECOMPRESSED_FROM,
502
+ // Number of output bytes (uncompressed) from decompression in reading
503
+ // compressed SST blocks from storage.
504
+ BYTES_DECOMPRESSED_TO,
505
+
432
506
  TICKER_ENUM_MAX
433
507
  };
434
508
 
@@ -466,7 +540,14 @@ enum Histograms : uint32_t {
466
540
  NUM_FILES_IN_SINGLE_COMPACTION,
467
541
  DB_SEEK,
468
542
  WRITE_STALL,
543
+ // Time spent in reading block-based or plain SST table
469
544
  SST_READ_MICROS,
545
+ // Time spent in reading SST table (currently only block-based table) or blob
546
+ // file corresponding to `Env::IOActivity`
547
+ FILE_READ_FLUSH_MICROS,
548
+ FILE_READ_COMPACTION_MICROS,
549
+ FILE_READ_DB_OPEN_MICROS,
550
+
470
551
  // The number of subcompactions actually scheduled during a compaction
471
552
  NUM_SUBCOMPACTIONS_SCHEDULED,
472
553
  // Value size distribution in each operation
@@ -474,10 +555,8 @@ enum Histograms : uint32_t {
474
555
  BYTES_PER_WRITE,
475
556
  BYTES_PER_MULTIGET,
476
557
 
477
- // number of bytes compressed/decompressed
478
- // number of bytes is when uncompressed; i.e. before/after respectively
479
- BYTES_COMPRESSED,
480
- BYTES_DECOMPRESSED,
558
+ BYTES_COMPRESSED, // DEPRECATED / unused (see BYTES_COMPRESSED_{FROM,TO})
559
+ BYTES_DECOMPRESSED, // DEPRECATED / unused (see BYTES_DECOMPRESSED_{FROM,TO})
481
560
  COMPRESSION_TIMES_NANOS,
482
561
  DECOMPRESSION_TIMES_NANOS,
483
562
  // Number of merge operands passed to the merge operator in user read
@@ -618,7 +697,7 @@ class Statistics : public Customizable {
618
697
  virtual void histogramData(uint32_t type,
619
698
  HistogramData* const data) const = 0;
620
699
  virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; }
621
- virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
700
+ virtual void recordTick(uint32_t tickerType, uint64_t count = 1) = 0;
622
701
  virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
623
702
  virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
624
703
  virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
@@ -259,7 +259,7 @@ struct BlockBasedTableOptions {
259
259
  bool no_block_cache = false;
260
260
 
261
261
  // If non-NULL use the specified cache for blocks.
262
- // If NULL, rocksdb will automatically create and use an 8MB internal cache.
262
+ // If NULL, rocksdb will automatically create and use a 32MB internal cache.
263
263
  std::shared_ptr<Cache> block_cache = nullptr;
264
264
 
265
265
  // If non-NULL use the specified cache for pages read from device
@@ -70,6 +70,7 @@ struct TablePropertiesNames {
70
70
  static const std::string kSlowCompressionEstimatedDataSize;
71
71
  static const std::string kFastCompressionEstimatedDataSize;
72
72
  static const std::string kSequenceNumberTimeMapping;
73
+ static const std::string kTailStartOffset;
73
74
  };
74
75
 
75
76
  // `TablePropertiesCollector` provides the mechanism for users to collect
@@ -239,6 +240,10 @@ struct TableProperties {
239
240
  // 0 means not exists.
240
241
  uint64_t external_sst_file_global_seqno_offset = 0;
241
242
 
243
+ // Offset where the "tail" part of SST file starts
244
+ // "Tail" refers to all blocks after data blocks till the end of the SST file
245
+ uint64_t tail_start_offset = 0;
246
+
242
247
  // DB identity
243
248
  // db_id is an identifier generated the first time the DB is created
244
249
  // If DB identity is unset or unassigned, `db_id` will be an empty string.
@@ -56,6 +56,7 @@ struct ThreadStatus {
56
56
  OP_UNKNOWN = 0,
57
57
  OP_COMPACTION,
58
58
  OP_FLUSH,
59
+ OP_DBOPEN,
59
60
  NUM_OP_TYPES
60
61
  };
61
62
 
@@ -96,6 +96,7 @@ Status GetLatestOptionsFileName(const std::string& dbpath, Env* env,
96
96
  // * prefix_extractor
97
97
  // * table_factory
98
98
  // * merge_operator
99
+ // * persist_user_defined_timestamps
99
100
  Status CheckOptionsCompatibility(
100
101
  const ConfigOptions& config_options, const std::string& dbpath,
101
102
  const DBOptions& db_options,
@@ -178,6 +178,13 @@ class StackableDB : public DB {
178
178
  import_options, metadata, handle);
179
179
  }
180
180
 
181
+ using DB::ClipColumnFamily;
182
+ virtual Status ClipColumnFamily(ColumnFamilyHandle* column_family,
183
+ const Slice& begin_key,
184
+ const Slice& end_key) override {
185
+ return db_->ClipColumnFamily(column_family, begin_key, end_key);
186
+ }
187
+
181
188
  using DB::VerifyFileChecksums;
182
189
  Status VerifyFileChecksums(const ReadOptions& read_opts) override {
183
190
  return db_->VerifyFileChecksums(read_opts);
@@ -681,4 +681,3 @@ class Transaction {
681
681
  };
682
682
 
683
683
  } // namespace ROCKSDB_NAMESPACE
684
-
@@ -12,8 +12,8 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 8
15
- #define ROCKSDB_MINOR 1
16
- #define ROCKSDB_PATCH 1
15
+ #define ROCKSDB_MINOR 3
16
+ #define ROCKSDB_PATCH 2
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
19
19
  // double underscore. Now we have to live with our choice. We'll deprecate these
@@ -81,13 +81,20 @@ class WriteBufferManager final {
81
81
  return buffer_size_.load(std::memory_order_relaxed);
82
82
  }
83
83
 
84
+ // REQUIRED: `new_size` > 0
84
85
  void SetBufferSize(size_t new_size) {
86
+ assert(new_size > 0);
85
87
  buffer_size_.store(new_size, std::memory_order_relaxed);
86
88
  mutable_limit_.store(new_size * 7 / 8, std::memory_order_relaxed);
87
89
  // Check if stall is active and can be ended.
88
90
  MaybeEndWriteStall();
89
91
  }
90
92
 
93
+ void SetAllowStall(bool new_allow_stall) {
94
+ allow_stall_.store(new_allow_stall, std::memory_order_relaxed);
95
+ MaybeEndWriteStall();
96
+ }
97
+
91
98
  // Below functions should be called by RocksDB internally.
92
99
 
93
100
  // Should only be called from write thread
@@ -117,7 +124,7 @@ class WriteBufferManager final {
117
124
  //
118
125
  // Should only be called by RocksDB internally .
119
126
  bool ShouldStall() const {
120
- if (!allow_stall_ || !enabled()) {
127
+ if (!allow_stall_.load(std::memory_order_relaxed) || !enabled()) {
121
128
  return false;
122
129
  }
123
130
 
@@ -165,7 +172,7 @@ class WriteBufferManager final {
165
172
  std::list<StallInterface*> queue_;
166
173
  // Protects the queue_ and stall_active_.
167
174
  std::mutex mu_;
168
- bool allow_stall_;
175
+ std::atomic<bool> allow_stall_;
169
176
  // Value should only be changed by BeginWriteStall() and MaybeEndWriteStall()
170
177
  // while holding mu_, but it can be read without a lock.
171
178
  std::atomic<bool> stall_active_;
@@ -76,6 +76,7 @@ class EnvLogger : public Logger {
76
76
  if (flush_pending_) {
77
77
  flush_pending_ = false;
78
78
  file_.Flush().PermitUncheckedError();
79
+ file_.reset_seen_error();
79
80
  }
80
81
  last_flush_micros_ = clock_->NowMicros();
81
82
  }
@@ -162,6 +163,7 @@ class EnvLogger : public Logger {
162
163
  FileOpGuard guard(*this);
163
164
  // We will ignore any error returned by Append().
164
165
  file_.Append(Slice(base, p - base)).PermitUncheckedError();
166
+ file_.reset_seen_error();
165
167
  flush_pending_ = true;
166
168
  const uint64_t now_micros = clock_->NowMicros();
167
169
  if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
@@ -14,6 +14,8 @@
14
14
  #include "rocksdb/utilities/customizable_util.h"
15
15
  #include "rocksdb/utilities/object_registry.h"
16
16
  #include "rocksdb/utilities/options_type.h"
17
+ #include "util/fastrange.h"
18
+ #include "util/random.h"
17
19
  #include "util/string_util.h"
18
20
 
19
21
  namespace ROCKSDB_NAMESPACE {
@@ -35,6 +37,9 @@ static std::unordered_map<std::string, OptionTypeInfo> jemalloc_type_info = {
35
37
  {offsetof(struct JemallocAllocatorOptions, tcache_size_upper_bound),
36
38
  OptionType::kSizeT, OptionVerificationType::kNormal,
37
39
  OptionTypeFlags::kNone}},
40
+ {"num_arenas",
41
+ {offsetof(struct JemallocAllocatorOptions, num_arenas), OptionType::kSizeT,
42
+ OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
38
43
  };
39
44
  bool JemallocNodumpAllocator::IsSupported(std::string* why) {
40
45
  #ifndef ROCKSDB_JEMALLOC
@@ -59,11 +64,13 @@ bool JemallocNodumpAllocator::IsSupported(std::string* why) {
59
64
 
60
65
  JemallocNodumpAllocator::JemallocNodumpAllocator(
61
66
  JemallocAllocatorOptions& options)
62
- : options_(options),
67
+ : options_(options)
63
68
  #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
64
- tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache),
69
+ ,
70
+ tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {
71
+ #else // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
72
+ {
65
73
  #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
66
- arena_index_(0) {
67
74
  RegisterOptions(&options_, &jemalloc_type_info);
68
75
  }
69
76
 
@@ -75,9 +82,9 @@ JemallocNodumpAllocator::~JemallocNodumpAllocator() {
75
82
  for (void* tcache_index : tcache_list) {
76
83
  DestroyThreadSpecificCache(tcache_index);
77
84
  }
78
- if (arena_index_ > 0) {
85
+ for (auto arena_index : arena_indexes_) {
79
86
  // Destroy arena. Silently ignore error.
80
- Status s = DestroyArena(arena_index_);
87
+ Status s = DestroyArena(arena_index);
81
88
  assert(s.ok());
82
89
  s.PermitUncheckedError();
83
90
  }
@@ -90,7 +97,8 @@ size_t JemallocNodumpAllocator::UsableSize(void* p,
90
97
 
91
98
  void* JemallocNodumpAllocator::Allocate(size_t size) {
92
99
  int tcache_flag = GetThreadSpecificCache(size);
93
- return mallocx(size, MALLOCX_ARENA(arena_index_) | tcache_flag);
100
+ uint32_t arena_index = GetArenaIndex();
101
+ return mallocx(size, MALLOCX_ARENA(arena_index) | tcache_flag);
94
102
  }
95
103
 
96
104
  void JemallocNodumpAllocator::Deallocate(void* p) {
@@ -105,45 +113,71 @@ void JemallocNodumpAllocator::Deallocate(void* p) {
105
113
  dallocx(p, tcache_flag);
106
114
  }
107
115
 
108
- Status JemallocNodumpAllocator::InitializeArenas() {
109
- // Create arena.
110
- size_t arena_index_size = sizeof(arena_index_);
111
- int ret =
112
- mallctl("arenas.create", &arena_index_, &arena_index_size, nullptr, 0);
113
- if (ret != 0) {
114
- return Status::Incomplete("Failed to create jemalloc arena, error code: " +
115
- std::to_string(ret));
116
+ uint32_t JemallocNodumpAllocator::GetArenaIndex() const {
117
+ if (arena_indexes_.size() == 1) {
118
+ return arena_indexes_[0];
116
119
  }
117
- assert(arena_index_ != 0);
118
120
 
119
- // Read existing hooks.
120
- std::string key = "arena." + std::to_string(arena_index_) + ".extent_hooks";
121
- extent_hooks_t* hooks;
122
- size_t hooks_size = sizeof(hooks);
123
- ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
124
- if (ret != 0) {
125
- return Status::Incomplete("Failed to read existing hooks, error code: " +
126
- std::to_string(ret));
127
- }
121
+ static std::atomic<uint32_t> next_seed = 0;
122
+ // Core-local may work in place of `thread_local` as we should be able to
123
+ // tolerate occasional stale reads in thread migration cases. However we need
124
+ // to make Random thread-safe and prevent cacheline bouncing. Whether this is
125
+ // worthwhile is still an open question.
126
+ thread_local Random tl_random(next_seed.fetch_add(1));
127
+ return arena_indexes_[FastRange32(tl_random.Next(), arena_indexes_.size())];
128
+ }
128
129
 
129
- // Store existing alloc.
130
- extent_alloc_t* original_alloc = hooks->alloc;
131
- extent_alloc_t* expected = nullptr;
132
- bool success =
133
- JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
134
- expected, original_alloc);
135
- if (!success && original_alloc != expected) {
136
- return Status::Incomplete("Original alloc conflict.");
137
- }
130
+ Status JemallocNodumpAllocator::InitializeArenas() {
131
+ assert(!init_);
132
+ init_ = true;
138
133
 
139
- // Set the custom hook.
140
- arena_hooks_.reset(new extent_hooks_t(*hooks));
141
- arena_hooks_->alloc = &JemallocNodumpAllocator::Alloc;
142
- extent_hooks_t* hooks_ptr = arena_hooks_.get();
143
- ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
144
- if (ret != 0) {
145
- return Status::Incomplete("Failed to set custom hook, error code: " +
146
- std::to_string(ret));
134
+ for (size_t i = 0; i < options_.num_arenas; i++) {
135
+ // Create arena.
136
+ unsigned arena_index;
137
+ size_t arena_index_size = sizeof(arena_index);
138
+ int ret =
139
+ mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0);
140
+ if (ret != 0) {
141
+ return Status::Incomplete(
142
+ "Failed to create jemalloc arena, error code: " +
143
+ std::to_string(ret));
144
+ }
145
+ arena_indexes_.push_back(arena_index);
146
+
147
+ // Read existing hooks.
148
+ std::string key =
149
+ "arena." + std::to_string(arena_indexes_[i]) + ".extent_hooks";
150
+ extent_hooks_t* hooks;
151
+ size_t hooks_size = sizeof(hooks);
152
+ ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
153
+ if (ret != 0) {
154
+ return Status::Incomplete("Failed to read existing hooks, error code: " +
155
+ std::to_string(ret));
156
+ }
157
+
158
+ // Store existing alloc.
159
+ extent_alloc_t* original_alloc = hooks->alloc;
160
+ extent_alloc_t* expected = nullptr;
161
+ bool success =
162
+ JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
163
+ expected, original_alloc);
164
+ if (!success && original_alloc != expected) {
165
+ // This could happen if jemalloc creates new arenas with different initial
166
+ // values in their `alloc` function pointers. See `original_alloc_` API
167
+ // doc for more details.
168
+ return Status::Incomplete("Original alloc conflict.");
169
+ }
170
+
171
+ // Set the custom hook.
172
+ per_arena_hooks_.emplace_back();
173
+ per_arena_hooks_.back().reset(new extent_hooks_t(*hooks));
174
+ per_arena_hooks_.back()->alloc = &JemallocNodumpAllocator::Alloc;
175
+ extent_hooks_t* hooks_ptr = per_arena_hooks_.back().get();
176
+ ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
177
+ if (ret != 0) {
178
+ return Status::Incomplete("Failed to set custom hook, error code: " +
179
+ std::to_string(ret));
180
+ }
147
181
  }
148
182
  return Status::OK();
149
183
  }
@@ -161,6 +195,8 @@ Status JemallocNodumpAllocator::PrepareOptions(
161
195
  options_.tcache_size_upper_bound) {
162
196
  return Status::InvalidArgument(
163
197
  "tcache_size_lower_bound larger or equal to tcache_size_upper_bound.");
198
+ } else if (options_.num_arenas < 1) {
199
+ return Status::InvalidArgument("num_arenas must be a positive integer");
164
200
  } else if (IsMutable()) {
165
201
  Status s = MemoryAllocator::PrepareOptions(config_options);
166
202
  #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
@@ -221,7 +257,7 @@ void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr,
221
257
  return result;
222
258
  }
223
259
 
224
- Status JemallocNodumpAllocator::DestroyArena(unsigned arena_index) {
260
+ Status JemallocNodumpAllocator::DestroyArena(uint32_t arena_index) {
225
261
  assert(arena_index != 0);
226
262
  std::string key = "arena." + std::to_string(arena_index) + ".destroy";
227
263
  int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
@@ -24,6 +24,10 @@
24
24
  #endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX
25
25
 
26
26
  namespace ROCKSDB_NAMESPACE {
27
+
28
+ // Allocation requests are randomly sharded across
29
+ // `JemallocAllocatorOptions::num_arenas` arenas to reduce contention on per-
30
+ // arena mutexes.
27
31
  class JemallocNodumpAllocator : public BaseMemoryAllocator {
28
32
  public:
29
33
  explicit JemallocNodumpAllocator(JemallocAllocatorOptions& options);
@@ -38,7 +42,7 @@ class JemallocNodumpAllocator : public BaseMemoryAllocator {
38
42
  return IsSupported(&unused);
39
43
  }
40
44
  static bool IsSupported(std::string* why);
41
- bool IsMutable() const { return arena_index_ == 0; }
45
+ bool IsMutable() const { return !init_; }
42
46
 
43
47
  Status PrepareOptions(const ConfigOptions& config_options) override;
44
48
 
@@ -52,9 +56,7 @@ class JemallocNodumpAllocator : public BaseMemoryAllocator {
52
56
  #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
53
57
  Status InitializeArenas();
54
58
 
55
- friend Status NewJemallocNodumpAllocator(
56
- JemallocAllocatorOptions& options,
57
- std::shared_ptr<MemoryAllocator>* memory_allocator);
59
+ uint32_t GetArenaIndex() const;
58
60
 
59
61
  // Custom alloc hook to replace jemalloc default alloc.
60
62
  static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size,
@@ -62,7 +64,7 @@ class JemallocNodumpAllocator : public BaseMemoryAllocator {
62
64
  unsigned arena_ind);
63
65
 
64
66
  // Destroy arena on destruction of the allocator, or on failure.
65
- static Status DestroyArena(unsigned arena_index);
67
+ static Status DestroyArena(uint32_t arena_index);
66
68
 
67
69
  // Destroy tcache on destruction of the allocator, or thread exit.
68
70
  static void DestroyThreadSpecificCache(void* ptr);
@@ -78,17 +80,20 @@ class JemallocNodumpAllocator : public BaseMemoryAllocator {
78
80
  // NewJemallocNodumpAllocator is thread-safe.
79
81
  //
80
82
  // Hack: original_alloc_ needs to be static for Alloc() to access it.
81
- // alloc needs to be static to pass to jemalloc as function pointer.
83
+ // alloc needs to be static to pass to jemalloc as function pointer. We can
84
+ // use a single process-wide value as long as we assume that any newly created
85
+ // arena has the same original value in its `alloc` function pointer.
82
86
  static std::atomic<extent_alloc_t*> original_alloc_;
83
87
 
84
88
  // Custom hooks has to outlive corresponding arena.
85
- std::unique_ptr<extent_hooks_t> arena_hooks_;
89
+ std::vector<std::unique_ptr<extent_hooks_t>> per_arena_hooks_;
86
90
 
87
91
  // Hold thread-local tcache index.
88
92
  ThreadLocalPtr tcache_;
93
+
94
+ std::vector<uint32_t> arena_indexes_;
89
95
  #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
90
96
 
91
- // Arena index.
92
- unsigned arena_index_;
97
+ bool init_ = false;
93
98
  };
94
99
  } // namespace ROCKSDB_NAMESPACE
@@ -576,6 +576,7 @@ static void ConcurrentReader(void* arg) {
576
576
  state->t_.ReadStep(&rnd);
577
577
  ++reads;
578
578
  }
579
+ (void)reads;
579
580
  state->Change(TestState::DONE);
580
581
  }
581
582
 
@@ -348,6 +348,7 @@ static void ConcurrentReader(void* arg) {
348
348
  state->t_.ReadStep(&rnd);
349
349
  ++reads;
350
350
  }
351
+ (void)reads;
351
352
  state->Change(TestState::DONE);
352
353
  }
353
354