@nxtedition/rocksdb 5.2.21 → 5.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (923) hide show
  1. package/binding.cc +510 -967
  2. package/binding.gyp +78 -72
  3. package/chained-batch.js +1 -2
  4. package/deps/rocksdb/build_version.cc +70 -4
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  6. package/deps/rocksdb/rocksdb/Makefile +459 -469
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  37. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
  38. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  39. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  40. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  41. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  42. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  43. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  57. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  59. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  60. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  61. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  64. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  65. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  66. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  67. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  68. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  69. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  70. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  71. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  72. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  73. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  74. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  75. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  76. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  77. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  78. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  79. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  80. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  81. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  82. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  83. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  96. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  97. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  99. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  100. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  101. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  102. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  103. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  104. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  105. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  106. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  107. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  108. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  110. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  111. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  112. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  113. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  114. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  115. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  116. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  117. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  118. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  119. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  120. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  121. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  122. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  124. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  125. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  126. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  127. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  128. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  129. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  130. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  131. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  132. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  133. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  134. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  135. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  136. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  137. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  138. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  139. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  140. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  141. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  142. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  143. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  144. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  145. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  146. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  147. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  149. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  150. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  151. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  152. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  153. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  154. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  155. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  156. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  157. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  158. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  159. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  160. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  161. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  162. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  163. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  164. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  165. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  166. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  167. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  168. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  169. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  170. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  171. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  172. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  173. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  174. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  175. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  176. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  177. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  178. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  179. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  180. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  181. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  182. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  183. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  184. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  185. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  186. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  187. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  188. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  189. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  190. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  191. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  192. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  193. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  194. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  195. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  196. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  197. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  198. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  199. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  200. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  201. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  202. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  203. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  204. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  205. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  206. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  207. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  208. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  209. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  210. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  211. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  212. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  213. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  214. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  215. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  216. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  217. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  218. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  219. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  220. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  221. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  222. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  224. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  225. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  226. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  227. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  228. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  229. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  230. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  231. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  232. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  233. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  234. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  235. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  236. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  237. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  238. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  239. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  240. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  241. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  242. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  243. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  244. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  245. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  246. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  247. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  248. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  249. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  250. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  251. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  252. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  253. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  254. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  255. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  256. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  257. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  258. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  260. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  261. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  262. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  263. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  264. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  284. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  290. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  291. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  292. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  293. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  294. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  295. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  296. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  297. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  298. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  299. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  300. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  301. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  302. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  303. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  304. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  305. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  306. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  307. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  308. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  309. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  310. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  311. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  312. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  313. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  314. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  315. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  316. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  317. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  318. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  319. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  320. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  321. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  322. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  323. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  324. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  325. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  326. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  327. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  328. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  329. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  330. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  331. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  332. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  333. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  334. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  335. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  336. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  337. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  338. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  339. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  340. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  341. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  342. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  343. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  344. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  345. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  346. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  347. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  349. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  350. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  351. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  353. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  354. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  356. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  358. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  359. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  360. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  362. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  363. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  364. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  365. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  367. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  368. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  369. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  370. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  371. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  372. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  373. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  374. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  375. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  376. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  377. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  378. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  381. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  382. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  383. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  385. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  386. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  388. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  390. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  391. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  392. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  393. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  396. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  397. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  417. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  418. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  419. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  420. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  422. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  423. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  425. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  426. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  427. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  428. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  429. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  430. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  431. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  432. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  433. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  434. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  435. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  436. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  437. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  438. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  439. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  440. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  441. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  442. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  443. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  444. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  445. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  446. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  447. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  448. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  449. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  450. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  451. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  452. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  453. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  454. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  455. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  456. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  457. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  458. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  459. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  460. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  461. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  462. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  463. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  464. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  465. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  466. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  467. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  468. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  469. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  470. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  471. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  472. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  473. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  474. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  475. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  476. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  477. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  478. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  479. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  480. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  481. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  482. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  483. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  484. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  485. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  486. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  487. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  488. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  489. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  490. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  491. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  492. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  493. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  494. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  495. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  496. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  497. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  498. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  499. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  500. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  501. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  502. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  503. package/deps/rocksdb/rocksdb/python.mk +9 -0
  504. package/deps/rocksdb/rocksdb/src.mk +82 -34
  505. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  506. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  507. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  508. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  514. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  515. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  516. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  517. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  518. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  519. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  520. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  521. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  522. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  523. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  524. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  525. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  526. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  527. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  529. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  530. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  531. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  532. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  534. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  535. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  536. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  537. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  538. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  539. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  540. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  541. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  542. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  543. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  544. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  547. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  548. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  549. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  550. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  551. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  552. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  553. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  554. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  555. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  556. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  557. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  558. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  559. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  560. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  561. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  562. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  563. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  564. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  565. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  566. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  567. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  568. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  569. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  570. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  571. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  572. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  573. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  574. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  575. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  576. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  577. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  578. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  579. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  580. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  581. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  582. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  586. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  587. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  588. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  589. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  590. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  591. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  592. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  593. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  594. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  595. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  596. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  597. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  598. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  599. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  600. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  601. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  602. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  603. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  604. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  605. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  606. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  607. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  608. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  609. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  610. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  611. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  612. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  613. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  614. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  615. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  616. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  617. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  618. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  619. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  620. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  621. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  622. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  623. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  624. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  625. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  626. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  627. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  628. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  629. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  630. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  631. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  632. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  633. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  634. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  635. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  636. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  637. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  638. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  639. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  640. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  641. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  642. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  643. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  644. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  645. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  646. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  647. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  648. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  649. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  650. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  651. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  652. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  653. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  654. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  655. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  656. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  657. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  658. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  659. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  660. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  661. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  662. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  663. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  664. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  665. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  666. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  667. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  668. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  669. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  670. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  671. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  672. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  673. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  674. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  675. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  676. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  677. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  678. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  679. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  680. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  681. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  682. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  683. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  684. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  685. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  686. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  687. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  688. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  689. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  690. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  691. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  692. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  693. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  694. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  695. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  696. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  697. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  698. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  699. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  700. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  701. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  702. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  703. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  704. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  705. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  706. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  707. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  708. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  709. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  710. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  711. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  712. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  713. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  714. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  715. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  716. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  717. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  718. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  719. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  720. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  721. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  722. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  723. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  724. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  725. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  726. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  727. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  728. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  729. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  730. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  731. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  732. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  733. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  734. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  741. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  742. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  743. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  744. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  745. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  746. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  747. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  748. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  749. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  750. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  751. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  752. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  753. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  754. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  755. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  756. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  757. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  758. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  759. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  760. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  761. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  762. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  763. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  764. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  765. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  766. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  767. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  768. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  769. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  770. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  771. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  772. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  773. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  774. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  775. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  776. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  777. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  778. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  779. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  780. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  786. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  787. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  788. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  789. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  790. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  791. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  792. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  793. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  794. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  795. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  796. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  797. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  798. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  799. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  802. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  803. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  804. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  805. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  806. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  807. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  808. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  809. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  810. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  811. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  812. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  813. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  814. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  815. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  816. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  817. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  818. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  821. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  822. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  823. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  824. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  825. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  826. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  827. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  828. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  829. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  830. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  831. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  832. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  833. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  834. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  835. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  836. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  837. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  838. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  839. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  840. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  841. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  842. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  843. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  844. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  845. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  846. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  847. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  848. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  849. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  850. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  851. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  852. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  853. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  854. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  855. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  856. package/deps/rocksdb/rocksdb.gyp +425 -446
  857. package/index.js +5 -87
  858. package/package-lock.json +23687 -0
  859. package/package.json +8 -9
  860. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  861. package/prebuilds/darwin-x64/node.napi.node +0 -0
  862. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  863. package/deps/rocksdb/rocksdb/README.md +0 -32
  864. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  865. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  866. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  867. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  868. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  869. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  870. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  871. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  872. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  873. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  874. package/deps/rocksdb/rocksdb/port/README +0 -10
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  904. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  905. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  906. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  907. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  908. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  909. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  910. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  911. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  912. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  913. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  914. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  915. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  916. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  917. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  918. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  919. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  920. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  921. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  922. package/deps/snappy/snappy-1.1.7/README.md +0 -149
  923. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -18,25 +18,35 @@
18
18
  #include <stdio.h>
19
19
  #include <stdlib.h>
20
20
  #include <sys/types.h>
21
+ #ifdef __APPLE__
22
+ #include <mach/host_info.h>
23
+ #include <mach/mach_host.h>
24
+ #include <sys/sysctl.h>
25
+ #endif
26
+ #ifdef __FreeBSD__
27
+ #include <sys/sysctl.h>
28
+ #endif
21
29
  #include <atomic>
22
30
  #include <cinttypes>
23
31
  #include <condition_variable>
24
32
  #include <cstddef>
33
+ #include <iostream>
25
34
  #include <memory>
26
35
  #include <mutex>
36
+ #include <queue>
27
37
  #include <thread>
28
38
  #include <unordered_map>
29
39
 
30
40
  #include "db/db_impl/db_impl.h"
31
41
  #include "db/malloc_stats.h"
32
42
  #include "db/version_set.h"
33
- #include "hdfs/env_hdfs.h"
34
43
  #include "monitoring/histogram.h"
35
44
  #include "monitoring/statistics.h"
36
45
  #include "options/cf_options.h"
37
46
  #include "port/port.h"
38
47
  #include "port/stack_trace.h"
39
48
  #include "rocksdb/cache.h"
49
+ #include "rocksdb/convenience.h"
40
50
  #include "rocksdb/db.h"
41
51
  #include "rocksdb/env.h"
42
52
  #include "rocksdb/filter_policy.h"
@@ -45,21 +55,29 @@
45
55
  #include "rocksdb/perf_context.h"
46
56
  #include "rocksdb/persistent_cache.h"
47
57
  #include "rocksdb/rate_limiter.h"
58
+ #include "rocksdb/secondary_cache.h"
48
59
  #include "rocksdb/slice.h"
49
60
  #include "rocksdb/slice_transform.h"
50
61
  #include "rocksdb/stats_history.h"
62
+ #include "rocksdb/table.h"
51
63
  #include "rocksdb/utilities/object_registry.h"
52
64
  #include "rocksdb/utilities/optimistic_transaction_db.h"
65
+ #include "rocksdb/utilities/options_type.h"
53
66
  #include "rocksdb/utilities/options_util.h"
67
+ #ifndef ROCKSDB_LITE
68
+ #include "rocksdb/utilities/replayer.h"
69
+ #endif // ROCKSDB_LITE
54
70
  #include "rocksdb/utilities/sim_cache.h"
55
71
  #include "rocksdb/utilities/transaction.h"
56
72
  #include "rocksdb/utilities/transaction_db.h"
57
73
  #include "rocksdb/write_batch.h"
58
74
  #include "test_util/testutil.h"
59
75
  #include "test_util/transaction_test_util.h"
76
+ #include "tools/simulated_hybrid_file_system.h"
60
77
  #include "util/cast_util.h"
61
78
  #include "util/compression.h"
62
79
  #include "util/crc32c.h"
80
+ #include "util/file_checksum_helper.h"
63
81
  #include "util/gflags_compat.h"
64
82
  #include "util/mutexlock.h"
65
83
  #include "util/random.h"
@@ -67,6 +85,7 @@
67
85
  #include "util/string_util.h"
68
86
  #include "util/xxhash.h"
69
87
  #include "utilities/blob_db/blob_db.h"
88
+ #include "utilities/counted_fs.h"
70
89
  #include "utilities/merge_operators.h"
71
90
  #include "utilities/merge_operators/bytesxor.h"
72
91
  #include "utilities/merge_operators/sortlist.h"
@@ -84,6 +103,12 @@ using GFLAGS_NAMESPACE::ParseCommandLineFlags;
84
103
  using GFLAGS_NAMESPACE::RegisterFlagValidator;
85
104
  using GFLAGS_NAMESPACE::SetUsageMessage;
86
105
 
106
+ #ifdef ROCKSDB_LITE
107
+ #define IF_ROCKSDB_LITE(Then, Else) Then
108
+ #else
109
+ #define IF_ROCKSDB_LITE(Then, Else) Else
110
+ #endif
111
+
87
112
  DEFINE_string(
88
113
  benchmarks,
89
114
  "fillseq,"
@@ -102,6 +127,12 @@ DEFINE_string(
102
127
  "readreverse,"
103
128
  "compact,"
104
129
  "compactall,"
130
+ "flush,"
131
+ IF_ROCKSDB_LITE("",
132
+ "compact0,"
133
+ "compact1,"
134
+ "waitforcompaction,"
135
+ )
105
136
  "multireadrandom,"
106
137
  "mixgraph,"
107
138
  "readseq,"
@@ -119,6 +150,8 @@ DEFINE_string(
119
150
  "fill100K,"
120
151
  "crc32c,"
121
152
  "xxhash,"
153
+ "xxhash64,"
154
+ "xxh3,"
122
155
  "compress,"
123
156
  "uncompress,"
124
157
  "acquireload,"
@@ -127,6 +160,7 @@ DEFINE_string(
127
160
  "randomreplacekeys,"
128
161
  "timeseries,"
129
162
  "getmergeoperands",
163
+ "readrandomoperands,"
130
164
 
131
165
  "Comma-separated list of operations to run in the specified"
132
166
  " order. Available benchmarks:\n"
@@ -177,8 +211,10 @@ DEFINE_string(
177
211
  "overwrite\n"
178
212
  "\tseekrandomwhilemerging -- seekrandom and 1 thread doing "
179
213
  "merge\n"
180
- "\tcrc32c -- repeated crc32c of 4K of data\n"
181
- "\txxhash -- repeated xxHash of 4K of data\n"
214
+ "\tcrc32c -- repeated crc32c of <block size> data\n"
215
+ "\txxhash -- repeated xxHash of <block size> data\n"
216
+ "\txxhash64 -- repeated xxHash64 of <block size> data\n"
217
+ "\txxh3 -- repeated XXH3 of <block size> data\n"
182
218
  "\tacquireload -- load N*1000 times\n"
183
219
  "\tfillseekseq -- write N values in sequential key, then read "
184
220
  "them by seeking to each key\n"
@@ -191,18 +227,30 @@ DEFINE_string(
191
227
  "Meta operations:\n"
192
228
  "\tcompact -- Compact the entire DB; If multiple, randomly choose one\n"
193
229
  "\tcompactall -- Compact the entire DB\n"
230
+ IF_ROCKSDB_LITE("",
231
+ "\tcompact0 -- compact L0 into L1\n"
232
+ "\tcompact1 -- compact L1 into L2\n"
233
+ "\twaitforcompaction - pause until compaction is (probably) done\n"
234
+ )
235
+ "\tflush - flush the memtable\n"
194
236
  "\tstats -- Print DB stats\n"
195
237
  "\tresetstats -- Reset DB stats\n"
196
238
  "\tlevelstats -- Print the number of files and bytes per level\n"
239
+ "\tmemstats -- Print memtable stats\n"
197
240
  "\tsstables -- Print sstable info\n"
198
241
  "\theapprofile -- Dump a heap profile (if supported by this port)\n"
242
+ IF_ROCKSDB_LITE("",
199
243
  "\treplay -- replay the trace file specified with trace_file\n"
244
+ )
200
245
  "\tgetmergeoperands -- Insert lots of merge records which are a list of "
201
246
  "sorted ints for a key and then compare performance of lookup for another "
202
- "key "
203
- "by doing a Get followed by binary searching in the large sorted list vs "
204
- "doing a GetMergeOperands and binary searching in the operands which are"
205
- "sorted sub-lists. The MergeOperator used is sortlist.h\n");
247
+ "key by doing a Get followed by binary searching in the large sorted list "
248
+ "vs doing a GetMergeOperands and binary searching in the operands which "
249
+ "are sorted sub-lists. The MergeOperator used is sortlist.h\n"
250
+ "\treadrandomoperands -- read random keys using `GetMergeOperands()`. An "
251
+ "operation includes a rare but possible retry in case it got "
252
+ "`Status::Incomplete()`. This happens upon encountering more keys than "
253
+ "have ever been seen by the thread (or eight initially)\n");
206
254
 
207
255
  DEFINE_int64(num, 1000000, "Number of key/values to place in database");
208
256
 
@@ -241,8 +289,10 @@ DEFINE_int64(deletes, -1, "Number of delete operations to do. "
241
289
 
242
290
  DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality");
243
291
 
244
- DEFINE_int64(seed, 0, "Seed base for random number generators. "
245
- "When 0 it is deterministic.");
292
+ DEFINE_int64(seed, 0,
293
+ "Seed base for random number generators. "
294
+ "When 0 it is derived from the current time.");
295
+ static int64_t seed_base;
246
296
 
247
297
  DEFINE_int32(threads, 1, "Number of concurrent threads to run.");
248
298
 
@@ -300,6 +350,58 @@ DEFINE_int32(num_multi_db, 0,
300
350
  DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
301
351
  " to this fraction of their original size after compression");
302
352
 
353
+ DEFINE_double(
354
+ overwrite_probability, 0.0,
355
+ "Used in 'filluniquerandom' benchmark: for each write operation, "
356
+ "we give a probability to perform an overwrite instead. The key used for "
357
+ "the overwrite is randomly chosen from the last 'overwrite_window_size' "
358
+ "keys previously inserted into the DB. "
359
+ "Valid overwrite_probability values: [0.0, 1.0].");
360
+
361
+ DEFINE_uint32(overwrite_window_size, 1,
362
+ "Used in 'filluniquerandom' benchmark. For each write operation,"
363
+ " when the overwrite_probability flag is set by the user, the "
364
+ "key used to perform an overwrite is randomly chosen from the "
365
+ "last 'overwrite_window_size' keys previously inserted into DB. "
366
+ "Warning: large values can affect throughput. "
367
+ "Valid overwrite_window_size values: [1, kMaxUint32].");
368
+
369
+ DEFINE_uint64(
370
+ disposable_entries_delete_delay, 0,
371
+ "Minimum delay in microseconds for the series of Deletes "
372
+ "to be issued. When 0 the insertion of the last disposable entry is "
373
+ "immediately followed by the issuance of the Deletes. "
374
+ "(only compatible with fillanddeleteuniquerandom benchmark).");
375
+
376
+ DEFINE_uint64(disposable_entries_batch_size, 0,
377
+ "Number of consecutively inserted disposable KV entries "
378
+ "that will be deleted after 'delete_delay' microseconds. "
379
+ "A series of Deletes is always issued once all the "
380
+ "disposable KV entries it targets have been inserted "
381
+ "into the DB. When 0 no deletes are issued and a "
382
+ "regular 'filluniquerandom' benchmark occurs. "
383
+ "(only compatible with fillanddeleteuniquerandom benchmark)");
384
+
385
+ DEFINE_int32(disposable_entries_value_size, 64,
386
+ "Size of the values (in bytes) of the entries targeted by "
387
+ "selective deletes. "
388
+ "(only compatible with fillanddeleteuniquerandom benchmark)");
389
+
390
+ DEFINE_uint64(
391
+ persistent_entries_batch_size, 0,
392
+ "Number of KV entries being inserted right before the deletes "
393
+ "targeting the disposable KV entries are issued. These "
394
+ "persistent keys are not targeted by the deletes, and will always "
395
+ "remain valid in the DB. (only compatible with "
396
+ "--benchmarks='fillanddeleteuniquerandom' "
397
+ "and used when--disposable_entries_batch_size is > 0).");
398
+
399
+ DEFINE_int32(persistent_entries_value_size, 64,
400
+ "Size of the values (in bytes) of the entries not targeted by "
401
+ "deletes. (only compatible with "
402
+ "--benchmarks='fillanddeleteuniquerandom' "
403
+ "and used when--disposable_entries_batch_size is > 0).");
404
+
303
405
  DEFINE_double(read_random_exp_range, 0.0,
304
406
  "Read random's key will be generated using distribution of "
305
407
  "num * exp(-r) where r is uniform number from 0 to this value. "
@@ -395,8 +497,6 @@ DEFINE_int32(max_background_compactions,
395
497
  "The maximum number of concurrent background compactions"
396
498
  " that can occur in parallel.");
397
499
 
398
- DEFINE_int32(base_background_compactions, -1, "DEPRECATED");
399
-
400
500
  DEFINE_uint64(subcompactions, 1,
401
501
  "Maximum number of subcompactions to divide L0-L1 compactions "
402
502
  "into.");
@@ -439,6 +539,9 @@ DEFINE_int32(universal_compression_size_percent, -1,
439
539
  DEFINE_bool(universal_allow_trivial_move, false,
440
540
  "Allow trivial move in universal compaction.");
441
541
 
542
+ DEFINE_bool(universal_incremental, false,
543
+ "Enable incremental compactions in universal compaction.");
544
+
442
545
  DEFINE_int64(cache_size, 8 << 20, // 8MB
443
546
  "Number of bytes to use as a cache of uncompressed data");
444
547
 
@@ -455,6 +558,38 @@ DEFINE_double(cache_high_pri_pool_ratio, 0.0,
455
558
  DEFINE_bool(use_clock_cache, false,
456
559
  "Replace default LRU block cache with clock cache.");
457
560
 
561
+ DEFINE_bool(use_compressed_secondary_cache, false,
562
+ "Use the CompressedSecondaryCache as the secondary cache.");
563
+
564
+ DEFINE_int64(compressed_secondary_cache_size, 8 << 20, // 8MB
565
+ "Number of bytes to use as a cache of data");
566
+
567
+ DEFINE_int32(compressed_secondary_cache_numshardbits, 6,
568
+ "Number of shards for the block cache"
569
+ " is 2 ** compressed_secondary_cache_numshardbits."
570
+ " Negative means use default settings."
571
+ " This is applied only if FLAGS_cache_size is non-negative.");
572
+
573
+ DEFINE_double(compressed_secondary_cache_high_pri_pool_ratio, 0.0,
574
+ "Ratio of block cache reserve for high pri blocks. "
575
+ "If > 0.0, we also enable "
576
+ "cache_index_and_filter_blocks_with_high_priority.");
577
+
578
+ DEFINE_string(compressed_secondary_cache_compression_type, "lz4",
579
+ "The compression algorithm to use for large "
580
+ "values stored in CompressedSecondaryCache.");
581
+ static enum ROCKSDB_NAMESPACE::CompressionType
582
+ FLAGS_compressed_secondary_cache_compression_type_e =
583
+ ROCKSDB_NAMESPACE::kLZ4Compression;
584
+
585
+ DEFINE_uint32(
586
+ compressed_secondary_cache_compress_format_version, 2,
587
+ "compress_format_version can have two values: "
588
+ "compress_format_version == 1 -- decompressed size is not included"
589
+ " in the block header."
590
+ "compress_format_version == 2 -- decompressed size is included"
591
+ " in the block header in varint32 format.");
592
+
458
593
  DEFINE_int64(simcache_size, -1,
459
594
  "Number of bytes to use as a simcache of "
460
595
  "uncompressed data. Nagative value disables simcache.");
@@ -532,6 +667,10 @@ DEFINE_bool(block_align,
532
667
  ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_align,
533
668
  "Align data blocks on page size");
534
669
 
670
+ DEFINE_int64(prepopulate_block_cache, 0,
671
+ "Pre-populate hot/warm blocks in block cache. 0 to disable and 1 "
672
+ "to insert during flush");
673
+
535
674
  DEFINE_bool(use_data_block_hash_index, false,
536
675
  "if use kDataBlockBinaryAndHash "
537
676
  "instead of kDataBlockBinarySearch. "
@@ -558,9 +697,6 @@ DEFINE_int32(file_opening_threads,
558
697
  "If open_files is set to -1, this option set the number of "
559
698
  "threads that will be used to open files during DB::Open()");
560
699
 
561
- DEFINE_bool(new_table_reader_for_compaction_inputs, true,
562
- "If true, uses a separate file handle for compaction inputs");
563
-
564
700
  DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size");
565
701
 
566
702
  DEFINE_int32(log_readahead_size, 0, "WAL and manifest readahead size");
@@ -571,8 +707,9 @@ DEFINE_int32(random_access_max_buffer_size, 1024 * 1024,
571
707
  DEFINE_int32(writable_file_max_buffer_size, 1024 * 1024,
572
708
  "Maximum write buffer for Writable File");
573
709
 
574
- DEFINE_int32(bloom_bits, -1, "Bloom filter bits per key. Negative means"
575
- " use default settings.");
710
+ DEFINE_int32(bloom_bits, -1,
711
+ "Bloom filter bits per key. Negative means use default."
712
+ "Zero disables.");
576
713
 
577
714
  DEFINE_bool(use_ribbon_filter, false, "Use Ribbon instead of Bloom filter");
578
715
 
@@ -584,6 +721,10 @@ DEFINE_bool(memtable_whole_key_filtering, false,
584
721
  DEFINE_bool(memtable_use_huge_page, false,
585
722
  "Try to use huge page in memtables.");
586
723
 
724
+ DEFINE_bool(whole_key_filtering,
725
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions().whole_key_filtering,
726
+ "Use whole keys (in addition to prefixes) in SST bloom filter.");
727
+
587
728
  DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
588
729
  " database. If you set this flag and also specify a benchmark that"
589
730
  " wants a fresh database, that benchmark will fail.");
@@ -632,6 +773,10 @@ DEFINE_bool(verify_checksum, true,
632
773
  "Verify checksum for every block read"
633
774
  " from storage");
634
775
 
776
+ DEFINE_int32(checksum_type,
777
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions().checksum,
778
+ "ChecksumType as an int");
779
+
635
780
  DEFINE_bool(statistics, false, "Database statistics");
636
781
  DEFINE_int32(stats_level, ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers,
637
782
  "stats level for statistics");
@@ -649,6 +794,14 @@ DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync");
649
794
 
650
795
  DEFINE_bool(disable_wal, false, "If true, do not write WAL for write.");
651
796
 
797
+ DEFINE_bool(manual_wal_flush, false,
798
+ "If true, buffer WAL until buffer is full or a manual FlushWAL().");
799
+
800
+ DEFINE_string(wal_compression, "none",
801
+ "Algorithm to use for WAL compression. none to disable.");
802
+ static enum ROCKSDB_NAMESPACE::CompressionType FLAGS_wal_compression_e =
803
+ ROCKSDB_NAMESPACE::kNoCompression;
804
+
652
805
  DEFINE_string(wal_dir, "", "If not empty, use the given dir for WAL");
653
806
 
654
807
  DEFINE_string(truth_db, "/dev/shm/truth_db/dbbench",
@@ -680,24 +833,23 @@ DEFINE_string(max_bytes_for_level_multiplier_additional, "",
680
833
 
681
834
  DEFINE_int32(level0_stop_writes_trigger,
682
835
  ROCKSDB_NAMESPACE::Options().level0_stop_writes_trigger,
683
- "Number of files in level-0"
684
- " that will trigger put stop.");
836
+ "Number of files in level-0 that will trigger put stop.");
685
837
 
686
838
  DEFINE_int32(level0_slowdown_writes_trigger,
687
839
  ROCKSDB_NAMESPACE::Options().level0_slowdown_writes_trigger,
688
- "Number of files in level-0"
689
- " that will slow down writes.");
840
+ "Number of files in level-0 that will slow down writes.");
690
841
 
691
842
  DEFINE_int32(level0_file_num_compaction_trigger,
692
843
  ROCKSDB_NAMESPACE::Options().level0_file_num_compaction_trigger,
693
- "Number of files in level-0"
694
- " when compactions start");
844
+ "Number of files in level-0 when compactions start.");
695
845
 
696
846
  DEFINE_uint64(periodic_compaction_seconds,
697
847
  ROCKSDB_NAMESPACE::Options().periodic_compaction_seconds,
698
848
  "Files older than this will be picked up for compaction and"
699
849
  " rewritten to the same level");
700
850
 
851
+ DEFINE_uint64(ttl_seconds, ROCKSDB_NAMESPACE::Options().ttl, "Set options.ttl");
852
+
701
853
  static bool ValidateInt32Percent(const char* flagname, int32_t value) {
702
854
  if (value <= 0 || value>=100) {
703
855
  fprintf(stderr, "Invalid value for --%s: %d, 0< pct <100 \n",
@@ -722,11 +874,25 @@ DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/"
722
874
  "deletepercent), so deletepercent must be smaller than (100 - "
723
875
  "FLAGS_readwritepercent)");
724
876
 
725
- DEFINE_bool(optimize_filters_for_hits, false,
877
+ DEFINE_bool(optimize_filters_for_hits,
878
+ ROCKSDB_NAMESPACE::Options().optimize_filters_for_hits,
726
879
  "Optimizes bloom filters for workloads for most lookups return "
727
880
  "a value. For now this doesn't create bloom filters for the max "
728
881
  "level of the LSM to reduce metadata that should fit in RAM. ");
729
882
 
883
+ DEFINE_bool(paranoid_checks, ROCKSDB_NAMESPACE::Options().paranoid_checks,
884
+ "RocksDB will aggressively check consistency of the data.");
885
+
886
+ DEFINE_bool(force_consistency_checks,
887
+ ROCKSDB_NAMESPACE::Options().force_consistency_checks,
888
+ "Runs consistency checks on the LSM every time a change is "
889
+ "applied.");
890
+
891
+ DEFINE_bool(check_flush_compaction_key_order,
892
+ ROCKSDB_NAMESPACE::Options().check_flush_compaction_key_order,
893
+ "During flush or compaction, check whether keys inserted to "
894
+ "output files are in order.");
895
+
730
896
  DEFINE_uint64(delete_obsolete_files_period_micros, 0,
731
897
  "Ignored. Left here for backward compatibility");
732
898
 
@@ -739,8 +905,7 @@ DEFINE_int64(writes_per_range_tombstone, 0,
739
905
  DEFINE_int64(range_tombstone_width, 100, "Number of keys in tombstone's range");
740
906
 
741
907
  DEFINE_int64(max_num_range_tombstones, 0,
742
- "Maximum number of range tombstones "
743
- "to insert.");
908
+ "Maximum number of range tombstones to insert.");
744
909
 
745
910
  DEFINE_bool(expand_range_tombstones, false,
746
911
  "Expand range tombstone into sequential regular tombstones.");
@@ -796,55 +961,104 @@ DEFINE_bool(fifo_compaction_allow_compaction, true,
796
961
 
797
962
  DEFINE_uint64(fifo_compaction_ttl, 0, "TTL for the SST Files in seconds.");
798
963
 
799
- // Blob DB Options
800
- DEFINE_bool(use_blob_db, false,
801
- "Open a BlobDB instance. "
802
- "Required for large value benchmark.");
964
+ DEFINE_uint64(fifo_age_for_warm, 0, "age_for_warm for FIFO compaction.");
965
+
966
+ // Stacked BlobDB Options
967
+ DEFINE_bool(use_blob_db, false, "[Stacked BlobDB] Open a BlobDB instance.");
803
968
 
804
969
  DEFINE_bool(
805
970
  blob_db_enable_gc,
806
971
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().enable_garbage_collection,
807
- "Enable BlobDB garbage collection.");
972
+ "[Stacked BlobDB] Enable BlobDB garbage collection.");
808
973
 
809
974
  DEFINE_double(
810
975
  blob_db_gc_cutoff,
811
976
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().garbage_collection_cutoff,
812
- "Cutoff ratio for BlobDB garbage collection.");
977
+ "[Stacked BlobDB] Cutoff ratio for BlobDB garbage collection.");
813
978
 
814
979
  DEFINE_bool(blob_db_is_fifo,
815
980
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().is_fifo,
816
- "Enable FIFO eviction strategy in BlobDB.");
981
+ "[Stacked BlobDB] Enable FIFO eviction strategy in BlobDB.");
817
982
 
818
983
  DEFINE_uint64(blob_db_max_db_size,
819
984
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().max_db_size,
820
- "Max size limit of the directory where blob files are stored.");
985
+ "[Stacked BlobDB] Max size limit of the directory where blob "
986
+ "files are stored.");
821
987
 
822
- DEFINE_uint64(
823
- blob_db_max_ttl_range, 0,
824
- "TTL range to generate BlobDB data (in seconds). 0 means no TTL.");
988
+ DEFINE_uint64(blob_db_max_ttl_range, 0,
989
+ "[Stacked BlobDB] TTL range to generate BlobDB data (in "
990
+ "seconds). 0 means no TTL.");
825
991
 
826
- DEFINE_uint64(blob_db_ttl_range_secs,
827
- ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().ttl_range_secs,
828
- "TTL bucket size to use when creating blob files.");
992
+ DEFINE_uint64(
993
+ blob_db_ttl_range_secs,
994
+ ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().ttl_range_secs,
995
+ "[Stacked BlobDB] TTL bucket size to use when creating blob files.");
829
996
 
830
- DEFINE_uint64(blob_db_min_blob_size,
831
- ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size,
832
- "Smallest blob to store in a file. Blobs smaller than this "
833
- "will be inlined with the key in the LSM tree.");
997
+ DEFINE_uint64(
998
+ blob_db_min_blob_size,
999
+ ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size,
1000
+ "[Stacked BlobDB] Smallest blob to store in a file. Blobs "
1001
+ "smaller than this will be inlined with the key in the LSM tree.");
834
1002
 
835
1003
  DEFINE_uint64(blob_db_bytes_per_sync,
836
1004
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().bytes_per_sync,
837
- "Bytes to sync blob file at.");
1005
+ "[Stacked BlobDB] Bytes to sync blob file at.");
838
1006
 
839
1007
  DEFINE_uint64(blob_db_file_size,
840
1008
  ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().blob_file_size,
841
- "Target size of each blob file.");
1009
+ "[Stacked BlobDB] Target size of each blob file.");
842
1010
 
843
- DEFINE_string(blob_db_compression_type, "snappy",
844
- "Algorithm to use to compress blob in blob file");
1011
+ DEFINE_string(
1012
+ blob_db_compression_type, "snappy",
1013
+ "[Stacked BlobDB] Algorithm to use to compress blobs in blob files.");
845
1014
  static enum ROCKSDB_NAMESPACE::CompressionType
846
1015
  FLAGS_blob_db_compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression;
847
1016
 
1017
+ #endif // ROCKSDB_LITE
1018
+
1019
+ // Integrated BlobDB options
1020
+ DEFINE_bool(
1021
+ enable_blob_files,
1022
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().enable_blob_files,
1023
+ "[Integrated BlobDB] Enable writing large values to separate blob files.");
1024
+
1025
+ DEFINE_uint64(min_blob_size,
1026
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().min_blob_size,
1027
+ "[Integrated BlobDB] The size of the smallest value to be stored "
1028
+ "separately in a blob file.");
1029
+
1030
+ DEFINE_uint64(blob_file_size,
1031
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_size,
1032
+ "[Integrated BlobDB] The size limit for blob files.");
1033
+
1034
+ DEFINE_string(blob_compression_type, "none",
1035
+ "[Integrated BlobDB] The compression algorithm to use for large "
1036
+ "values stored in blob files.");
1037
+
1038
+ DEFINE_bool(enable_blob_garbage_collection,
1039
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
1040
+ .enable_blob_garbage_collection,
1041
+ "[Integrated BlobDB] Enable blob garbage collection.");
1042
+
1043
+ DEFINE_double(blob_garbage_collection_age_cutoff,
1044
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
1045
+ .blob_garbage_collection_age_cutoff,
1046
+ "[Integrated BlobDB] The cutoff in terms of blob file age for "
1047
+ "garbage collection.");
1048
+
1049
+ DEFINE_double(blob_garbage_collection_force_threshold,
1050
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
1051
+ .blob_garbage_collection_force_threshold,
1052
+ "[Integrated BlobDB] The threshold for the ratio of garbage in "
1053
+ "the oldest blob files for forcing garbage collection.");
1054
+
1055
+ DEFINE_uint64(blob_compaction_readahead_size,
1056
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
1057
+ .blob_compaction_readahead_size,
1058
+ "[Integrated BlobDB] Compaction readahead for blob files.");
1059
+
1060
+ #ifndef ROCKSDB_LITE
1061
+
848
1062
  // Secondary DB instance Options
849
1063
  DEFINE_bool(use_secondary_db, false,
850
1064
  "Open a RocksDB secondary instance. A primary instance can be "
@@ -866,10 +1080,12 @@ DEFINE_bool(report_bg_io_stats, false,
866
1080
  DEFINE_bool(use_stderr_info_logger, false,
867
1081
  "Write info logs to stderr instead of to LOG file. ");
868
1082
 
1083
+ #ifndef ROCKSDB_LITE
1084
+
869
1085
  DEFINE_string(trace_file, "", "Trace workload to a file. ");
870
1086
 
871
- DEFINE_int32(trace_replay_fast_forward, 1,
872
- "Fast forward trace replay, must >= 1. ");
1087
+ DEFINE_double(trace_replay_fast_forward, 1.0,
1088
+ "Fast forward trace replay, must > 0.0.");
873
1089
  DEFINE_int32(block_cache_trace_sampling_frequency, 1,
874
1090
  "Block cache trace sampling frequency, termed s. It uses spatial "
875
1091
  "downsampling and samples accesses to one out of s blocks.");
@@ -883,6 +1099,37 @@ DEFINE_string(block_cache_trace_file, "", "Block cache trace file path.");
883
1099
  DEFINE_int32(trace_replay_threads, 1,
884
1100
  "The number of threads to replay, must >=1.");
885
1101
 
1102
+ DEFINE_bool(io_uring_enabled, true,
1103
+ "If true, enable the use of IO uring if the platform supports it");
1104
+ extern "C" bool RocksDbIOUringEnable() { return FLAGS_io_uring_enabled; }
1105
+ #endif // ROCKSDB_LITE
1106
+
1107
+ DEFINE_bool(adaptive_readahead, false,
1108
+ "carry forward internal auto readahead size from one file to next "
1109
+ "file at each level during iteration");
1110
+
1111
+ DEFINE_bool(rate_limit_user_ops, false,
1112
+ "When true use Env::IO_USER priority level to charge internal rate "
1113
+ "limiter for reads associated with user operations.");
1114
+
1115
+ DEFINE_bool(file_checksum, false,
1116
+ "When true use FileChecksumGenCrc32cFactory for "
1117
+ "file_checksum_gen_factory.");
1118
+
1119
+ DEFINE_bool(rate_limit_auto_wal_flush, false,
1120
+ "When true use Env::IO_USER priority level to charge internal rate "
1121
+ "limiter for automatic WAL flush (`Options::manual_wal_flush` == "
1122
+ "false) after the user write operation.");
1123
+
1124
+ DEFINE_bool(async_io, false,
1125
+ "When set true, RocksDB does asynchronous reads for internal auto "
1126
+ "readahead prefetching.");
1127
+
1128
+ DEFINE_bool(reserve_table_reader_memory, false,
1129
+ "A dynamically updating charge to block cache, loosely based on "
1130
+ "the actual memory usage of table reader, will occur to account "
1131
+ "the memory, if block cache available.");
1132
+
886
1133
  static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType(
887
1134
  const char* ctype) {
888
1135
  assert(ctype);
@@ -903,9 +1150,10 @@ static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType(
903
1150
  return ROCKSDB_NAMESPACE::kXpressCompression;
904
1151
  else if (!strcasecmp(ctype, "zstd"))
905
1152
  return ROCKSDB_NAMESPACE::kZSTD;
906
-
907
- fprintf(stdout, "Cannot parse compression type '%s'\n", ctype);
908
- return ROCKSDB_NAMESPACE::kSnappyCompression; // default value
1153
+ else {
1154
+ fprintf(stderr, "Cannot parse compression type '%s'\n", ctype);
1155
+ exit(1);
1156
+ }
909
1157
  }
910
1158
 
911
1159
  static std::string ColumnFamilyName(size_t i) {
@@ -948,10 +1196,14 @@ DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
948
1196
  DEFINE_int32(compression_parallel_threads, 1,
949
1197
  "Number of threads for parallel compression.");
950
1198
 
1199
+ DEFINE_uint64(compression_max_dict_buffer_bytes,
1200
+ ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes,
1201
+ "Maximum bytes to buffer to collect samples for dictionary.");
1202
+
951
1203
  static bool ValidateTableCacheNumshardbits(const char* flagname,
952
1204
  int32_t value) {
953
- if (0 >= value || value > 20) {
954
- fprintf(stderr, "Invalid value for --%s: %d, must be 0 < val <= 20\n",
1205
+ if (0 >= value || value >= 20) {
1206
+ fprintf(stderr, "Invalid value for --%s: %d, must be 0 < val < 20\n",
955
1207
  flagname, value);
956
1208
  return false;
957
1209
  }
@@ -961,16 +1213,20 @@ DEFINE_int32(table_cache_numshardbits, 4, "");
961
1213
 
962
1214
  #ifndef ROCKSDB_LITE
963
1215
  DEFINE_string(env_uri, "",
964
- "URI for registry Env lookup. Mutually exclusive"
965
- " with --hdfs and --fs_uri");
1216
+ "URI for registry Env lookup. Mutually exclusive with --fs_uri");
966
1217
  DEFINE_string(fs_uri, "",
967
1218
  "URI for registry Filesystem lookup. Mutually exclusive"
968
- " with --hdfs and --env_uri."
1219
+ " with --env_uri."
969
1220
  " Creates a default environment with the specified filesystem.");
970
1221
  #endif // ROCKSDB_LITE
971
- DEFINE_string(hdfs, "",
972
- "Name of hdfs environment. Mutually exclusive with"
973
- " --env_uri and --fs_uri");
1222
+ DEFINE_string(simulate_hybrid_fs_file, "",
1223
+ "File for Store Metadata for Simulate hybrid FS. Empty means "
1224
+ "disable the feature. Now, if it is set, "
1225
+ "bottommost_temperature is set to kWarm.");
1226
+ DEFINE_int32(simulate_hybrid_hdd_multipliers, 1,
1227
+ "In simulate_hybrid_fs_file or simulate_hdd mode, how many HDDs "
1228
+ "are simulated.");
1229
+ DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD.");
974
1230
 
975
1231
  static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
976
1232
 
@@ -985,8 +1241,12 @@ DEFINE_int64(stats_interval_seconds, 0, "Report stats every N seconds. This "
985
1241
  DEFINE_int32(stats_per_interval, 0, "Reports additional stats per interval when"
986
1242
  " this is greater than 0.");
987
1243
 
1244
+ DEFINE_uint64(slow_usecs, 1000000,
1245
+ "A message is printed for operations that "
1246
+ "take at least this many microseconds.");
1247
+
988
1248
  DEFINE_int64(report_interval_seconds, 0,
989
- "If greater than zero, it will write simple stats in CVS format "
1249
+ "If greater than zero, it will write simple stats in CSV format "
990
1250
  "to --report_file every N seconds");
991
1251
 
992
1252
  DEFINE_string(report_file, "report.csv",
@@ -1000,28 +1260,6 @@ DEFINE_int32(thread_status_per_interval, 0,
1000
1260
  DEFINE_int32(perf_level, ROCKSDB_NAMESPACE::PerfLevel::kDisable,
1001
1261
  "Level of perf collection");
1002
1262
 
1003
- #ifndef ROCKSDB_LITE
1004
- static ROCKSDB_NAMESPACE::Env* GetCompositeEnv(
1005
- std::shared_ptr<ROCKSDB_NAMESPACE::FileSystem> fs) {
1006
- static std::shared_ptr<ROCKSDB_NAMESPACE::Env> composite_env =
1007
- ROCKSDB_NAMESPACE::NewCompositeEnv(fs);
1008
- return composite_env.get();
1009
- }
1010
- #endif
1011
-
1012
- static bool ValidateRateLimit(const char* flagname, double value) {
1013
- const double EPSILON = 1e-10;
1014
- if ( value < -EPSILON ) {
1015
- fprintf(stderr, "Invalid value for --%s: %12.6f, must be >= 0.0\n",
1016
- flagname, value);
1017
- return false;
1018
- }
1019
- return true;
1020
- }
1021
- DEFINE_double(soft_rate_limit, 0.0, "DEPRECATED");
1022
-
1023
- DEFINE_double(hard_rate_limit, 0.0, "DEPRECATED");
1024
-
1025
1263
  DEFINE_uint64(soft_pending_compaction_bytes_limit, 64ull * 1024 * 1024 * 1024,
1026
1264
  "Slowdown writes if pending compaction bytes exceed this number");
1027
1265
 
@@ -1043,6 +1281,10 @@ DEFINE_bool(
1043
1281
  DEFINE_bool(allow_concurrent_memtable_write, true,
1044
1282
  "Allow multi-writers to update mem tables in parallel.");
1045
1283
 
1284
+ DEFINE_double(experimental_mempurge_threshold, 0.0,
1285
+ "Maximum useful payload ratio estimate that triggers a mempurge "
1286
+ "(memtable garbage collection).");
1287
+
1046
1288
  DEFINE_bool(inplace_update_support,
1047
1289
  ROCKSDB_NAMESPACE::Options().inplace_update_support,
1048
1290
  "Support in-place memtable update for smaller or same-size values");
@@ -1062,12 +1304,11 @@ DEFINE_uint64(write_thread_slow_yield_usec, 3,
1062
1304
  "The threshold at which a slow yield is considered a signal that "
1063
1305
  "other processes or threads want the core.");
1064
1306
 
1065
- DEFINE_int32(rate_limit_delay_max_milliseconds, 1000,
1066
- "When hard_rate_limit is set then this is the max time a put will"
1067
- " be stalled.");
1068
-
1069
1307
  DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value.");
1070
1308
 
1309
+ DEFINE_int64(rate_limiter_refill_period_us, 100 * 1000,
1310
+ "Set refill period on rate limiter.");
1311
+
1071
1312
  DEFINE_bool(rate_limiter_auto_tuned, false,
1072
1313
  "Enable dynamic adjustment of rate limit according to demand for "
1073
1314
  "background I/O");
@@ -1114,30 +1355,31 @@ DEFINE_double(keyrange_dist_d, 0.0,
1114
1355
  "f(x)=a*exp(b*x)+c*exp(d*x)");
1115
1356
  DEFINE_int64(keyrange_num, 1,
1116
1357
  "The number of key ranges that are in the same prefix "
1117
- "group, each prefix range will have its key access "
1118
- "distribution");
1358
+ "group, each prefix range will have its key access distribution");
1119
1359
  DEFINE_double(key_dist_a, 0.0,
1120
- "The parameter 'a' of key access distribution model "
1121
- "f(x)=a*x^b");
1360
+ "The parameter 'a' of key access distribution model f(x)=a*x^b");
1122
1361
  DEFINE_double(key_dist_b, 0.0,
1123
- "The parameter 'b' of key access distribution model "
1124
- "f(x)=a*x^b");
1362
+ "The parameter 'b' of key access distribution model f(x)=a*x^b");
1125
1363
  DEFINE_double(value_theta, 0.0,
1126
1364
  "The parameter 'theta' of Generized Pareto Distribution "
1127
1365
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1128
- DEFINE_double(value_k, 0.0,
1366
+ // Use reasonable defaults based on the mixgraph paper
1367
+ DEFINE_double(value_k, 0.2615,
1129
1368
  "The parameter 'k' of Generized Pareto Distribution "
1130
1369
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1131
- DEFINE_double(value_sigma, 0.0,
1370
+ // Use reasonable defaults based on the mixgraph paper
1371
+ DEFINE_double(value_sigma, 25.45,
1132
1372
  "The parameter 'theta' of Generized Pareto Distribution "
1133
1373
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1134
1374
  DEFINE_double(iter_theta, 0.0,
1135
1375
  "The parameter 'theta' of Generized Pareto Distribution "
1136
1376
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1137
- DEFINE_double(iter_k, 0.0,
1377
+ // Use reasonable defaults based on the mixgraph paper
1378
+ DEFINE_double(iter_k, 2.517,
1138
1379
  "The parameter 'k' of Generized Pareto Distribution "
1139
1380
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1140
- DEFINE_double(iter_sigma, 0.0,
1381
+ // Use reasonable defaults based on the mixgraph paper
1382
+ DEFINE_double(iter_sigma, 14.236,
1141
1383
  "The parameter 'sigma' of Generized Pareto Distribution "
1142
1384
  "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
1143
1385
  DEFINE_double(mix_get_ratio, 1.0,
@@ -1147,8 +1389,6 @@ DEFINE_double(mix_put_ratio, 0.0,
1147
1389
  DEFINE_double(mix_seek_ratio, 0.0,
1148
1390
  "The ratio of Seek queries of mix_graph workload");
1149
1391
  DEFINE_int64(mix_max_scan_len, 10000, "The max scan length of Iterator");
1150
- DEFINE_int64(mix_ave_kv_size, 512,
1151
- "The average key-value size of this workload");
1152
1392
  DEFINE_int64(mix_max_value_size, 1024, "The max value size of this workload");
1153
1393
  DEFINE_double(
1154
1394
  sine_mix_rate_noise, 0.0,
@@ -1296,34 +1536,13 @@ DEFINE_bool(persist_stats_to_disk,
1296
1536
  DEFINE_uint64(stats_history_buffer_size,
1297
1537
  ROCKSDB_NAMESPACE::Options().stats_history_buffer_size,
1298
1538
  "Max number of stats snapshots to keep in memory");
1539
+ DEFINE_bool(avoid_flush_during_recovery,
1540
+ ROCKSDB_NAMESPACE::Options().avoid_flush_during_recovery,
1541
+ "If true, avoids flushing the recovered WAL data where possible.");
1299
1542
  DEFINE_int64(multiread_stride, 0,
1300
1543
  "Stride length for the keys in a MultiGet batch");
1301
1544
  DEFINE_bool(multiread_batched, false, "Use the new MultiGet API");
1302
1545
 
1303
- enum RepFactory {
1304
- kSkipList,
1305
- kPrefixHash,
1306
- kVectorRep,
1307
- kHashLinkedList,
1308
- };
1309
-
1310
- static enum RepFactory StringToRepFactory(const char* ctype) {
1311
- assert(ctype);
1312
-
1313
- if (!strcasecmp(ctype, "skip_list"))
1314
- return kSkipList;
1315
- else if (!strcasecmp(ctype, "prefix_hash"))
1316
- return kPrefixHash;
1317
- else if (!strcasecmp(ctype, "vector"))
1318
- return kVectorRep;
1319
- else if (!strcasecmp(ctype, "hash_linkedlist"))
1320
- return kHashLinkedList;
1321
-
1322
- fprintf(stdout, "Cannot parse memreptable %s\n", ctype);
1323
- return kSkipList;
1324
- }
1325
-
1326
- static enum RepFactory FLAGS_rep_factory;
1327
1546
  DEFINE_string(memtablerep, "skip_list", "");
1328
1547
  DEFINE_int64(hash_bucket_count, 1024 * 1024, "hash bucket count");
1329
1548
  DEFINE_bool(use_plain_table, false, "if use plain table "
@@ -1345,17 +1564,18 @@ DEFINE_int32(skip_list_lookahead, 0, "Used with skip_list memtablerep; try "
1345
1564
  "position");
1346
1565
  DEFINE_bool(report_file_operations, false, "if report number of file "
1347
1566
  "operations");
1567
+ DEFINE_bool(report_open_timing, false, "if report open timing");
1348
1568
  DEFINE_int32(readahead_size, 0, "Iterator readahead size");
1349
1569
 
1350
1570
  DEFINE_bool(read_with_latest_user_timestamp, true,
1351
1571
  "If true, always use the current latest timestamp for read. If "
1352
1572
  "false, choose a random timestamp from the past.");
1353
1573
 
1354
- static const bool FLAGS_soft_rate_limit_dummy __attribute__((__unused__)) =
1355
- RegisterFlagValidator(&FLAGS_soft_rate_limit, &ValidateRateLimit);
1356
-
1357
- static const bool FLAGS_hard_rate_limit_dummy __attribute__((__unused__)) =
1358
- RegisterFlagValidator(&FLAGS_hard_rate_limit, &ValidateRateLimit);
1574
+ #ifndef ROCKSDB_LITE
1575
+ DEFINE_string(secondary_cache_uri, "",
1576
+ "Full URI for creating a custom secondary cache object");
1577
+ static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;
1578
+ #endif // ROCKSDB_LITE
1359
1579
 
1360
1580
  static const bool FLAGS_prefix_size_dummy __attribute__((__unused__)) =
1361
1581
  RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize);
@@ -1380,131 +1600,32 @@ static const bool FLAGS_table_cache_numshardbits_dummy __attribute__((__unused__
1380
1600
  &ValidateTableCacheNumshardbits);
1381
1601
 
1382
1602
  namespace ROCKSDB_NAMESPACE {
1383
-
1384
1603
  namespace {
1385
- struct ReportFileOpCounters {
1386
- std::atomic<int> open_counter_;
1387
- std::atomic<int> read_counter_;
1388
- std::atomic<int> append_counter_;
1389
- std::atomic<uint64_t> bytes_read_;
1390
- std::atomic<uint64_t> bytes_written_;
1391
- };
1392
-
1393
- // A special Env to records and report file operations in db_bench
1394
- class ReportFileOpEnv : public EnvWrapper {
1395
- public:
1396
- explicit ReportFileOpEnv(Env* base) : EnvWrapper(base) { reset(); }
1397
-
1398
- void reset() {
1399
- counters_.open_counter_ = 0;
1400
- counters_.read_counter_ = 0;
1401
- counters_.append_counter_ = 0;
1402
- counters_.bytes_read_ = 0;
1403
- counters_.bytes_written_ = 0;
1404
- }
1405
-
1406
- Status NewSequentialFile(const std::string& f,
1407
- std::unique_ptr<SequentialFile>* r,
1408
- const EnvOptions& soptions) override {
1409
- class CountingFile : public SequentialFile {
1410
- private:
1411
- std::unique_ptr<SequentialFile> target_;
1412
- ReportFileOpCounters* counters_;
1413
-
1414
- public:
1415
- CountingFile(std::unique_ptr<SequentialFile>&& target,
1416
- ReportFileOpCounters* counters)
1417
- : target_(std::move(target)), counters_(counters) {}
1418
-
1419
- Status Read(size_t n, Slice* result, char* scratch) override {
1420
- counters_->read_counter_.fetch_add(1, std::memory_order_relaxed);
1421
- Status rv = target_->Read(n, result, scratch);
1422
- counters_->bytes_read_.fetch_add(result->size(),
1423
- std::memory_order_relaxed);
1424
- return rv;
1425
- }
1426
-
1427
- Status Skip(uint64_t n) override { return target_->Skip(n); }
1428
- };
1429
-
1430
- Status s = target()->NewSequentialFile(f, r, soptions);
1431
- if (s.ok()) {
1432
- counters()->open_counter_.fetch_add(1, std::memory_order_relaxed);
1433
- r->reset(new CountingFile(std::move(*r), counters()));
1434
- }
1435
- return s;
1436
- }
1437
-
1438
- Status NewRandomAccessFile(const std::string& f,
1439
- std::unique_ptr<RandomAccessFile>* r,
1440
- const EnvOptions& soptions) override {
1441
- class CountingFile : public RandomAccessFile {
1442
- private:
1443
- std::unique_ptr<RandomAccessFile> target_;
1444
- ReportFileOpCounters* counters_;
1445
-
1446
- public:
1447
- CountingFile(std::unique_ptr<RandomAccessFile>&& target,
1448
- ReportFileOpCounters* counters)
1449
- : target_(std::move(target)), counters_(counters) {}
1450
- Status Read(uint64_t offset, size_t n, Slice* result,
1451
- char* scratch) const override {
1452
- counters_->read_counter_.fetch_add(1, std::memory_order_relaxed);
1453
- Status rv = target_->Read(offset, n, result, scratch);
1454
- counters_->bytes_read_.fetch_add(result->size(),
1455
- std::memory_order_relaxed);
1456
- return rv;
1457
- }
1458
- };
1459
-
1460
- Status s = target()->NewRandomAccessFile(f, r, soptions);
1461
- if (s.ok()) {
1462
- counters()->open_counter_.fetch_add(1, std::memory_order_relaxed);
1463
- r->reset(new CountingFile(std::move(*r), counters()));
1464
- }
1465
- return s;
1466
- }
1467
-
1468
- Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
1469
- const EnvOptions& soptions) override {
1470
- class CountingFile : public WritableFile {
1471
- private:
1472
- std::unique_ptr<WritableFile> target_;
1473
- ReportFileOpCounters* counters_;
1474
-
1475
- public:
1476
- CountingFile(std::unique_ptr<WritableFile>&& target,
1477
- ReportFileOpCounters* counters)
1478
- : target_(std::move(target)), counters_(counters) {}
1479
-
1480
- Status Append(const Slice& data) override {
1481
- counters_->append_counter_.fetch_add(1, std::memory_order_relaxed);
1482
- Status rv = target_->Append(data);
1483
- counters_->bytes_written_.fetch_add(data.size(),
1484
- std::memory_order_relaxed);
1485
- return rv;
1486
- }
1487
-
1488
- Status Truncate(uint64_t size) override { return target_->Truncate(size); }
1489
- Status Close() override { return target_->Close(); }
1490
- Status Flush() override { return target_->Flush(); }
1491
- Status Sync() override { return target_->Sync(); }
1492
- };
1493
-
1494
- Status s = target()->NewWritableFile(f, r, soptions);
1604
+ static Status CreateMemTableRepFactory(
1605
+ const ConfigOptions& config_options,
1606
+ std::shared_ptr<MemTableRepFactory>* factory) {
1607
+ Status s;
1608
+ if (!strcasecmp(FLAGS_memtablerep.c_str(), SkipListFactory::kNickName())) {
1609
+ factory->reset(new SkipListFactory(FLAGS_skip_list_lookahead));
1610
+ #ifndef ROCKSDB_LITE
1611
+ } else if (!strcasecmp(FLAGS_memtablerep.c_str(), "prefix_hash")) {
1612
+ factory->reset(NewHashSkipListRepFactory(FLAGS_hash_bucket_count));
1613
+ } else if (!strcasecmp(FLAGS_memtablerep.c_str(),
1614
+ VectorRepFactory::kNickName())) {
1615
+ factory->reset(new VectorRepFactory());
1616
+ } else if (!strcasecmp(FLAGS_memtablerep.c_str(), "hash_linkedlist")) {
1617
+ factory->reset(NewHashLinkListRepFactory(FLAGS_hash_bucket_count));
1618
+ #endif // ROCKSDB_LITE
1619
+ } else {
1620
+ std::unique_ptr<MemTableRepFactory> unique;
1621
+ s = MemTableRepFactory::CreateFromString(config_options, FLAGS_memtablerep,
1622
+ &unique);
1495
1623
  if (s.ok()) {
1496
- counters()->open_counter_.fetch_add(1, std::memory_order_relaxed);
1497
- r->reset(new CountingFile(std::move(*r), counters()));
1624
+ factory->reset(unique.release());
1498
1625
  }
1499
- return s;
1500
1626
  }
1501
-
1502
- // getter
1503
- ReportFileOpCounters* counters() { return &counters_; }
1504
-
1505
- private:
1506
- ReportFileOpCounters counters_;
1507
- };
1627
+ return s;
1628
+ }
1508
1629
 
1509
1630
  } // namespace
1510
1631
 
@@ -1527,7 +1648,7 @@ static enum DistributionType StringToDistributionType(const char* ctype) {
1527
1648
  return kNormal;
1528
1649
 
1529
1650
  fprintf(stdout, "Cannot parse distribution type '%s'\n", ctype);
1530
- return kFixed; // default value
1651
+ exit(1);
1531
1652
  }
1532
1653
 
1533
1654
  class BaseDistribution {
@@ -1766,7 +1887,7 @@ struct DBWithColumnFamilies {
1766
1887
  }
1767
1888
  };
1768
1889
 
1769
- // a class that reports stats to CSV file
1890
+ // A class that reports stats to CSV file.
1770
1891
  class ReporterAgent {
1771
1892
  public:
1772
1893
  ReporterAgent(Env* env, const std::string& fname,
@@ -1809,7 +1930,8 @@ class ReporterAgent {
1809
1930
  private:
1810
1931
  std::string Header() const { return "secs_elapsed,interval_qps"; }
1811
1932
  void SleepAndReport() {
1812
- auto time_started = env_->NowMicros();
1933
+ auto* clock = env_->GetSystemClock().get();
1934
+ auto time_started = clock->NowMicros();
1813
1935
  while (true) {
1814
1936
  {
1815
1937
  std::unique_lock<std::mutex> lk(mutex_);
@@ -1824,7 +1946,7 @@ class ReporterAgent {
1824
1946
  auto total_ops_done_snapshot = total_ops_done_.load();
1825
1947
  // round the seconds elapsed
1826
1948
  auto secs_elapsed =
1827
- (env_->NowMicros() - time_started + kMicrosInSecond / 2) /
1949
+ (clock->NowMicros() - time_started + kMicrosInSecond / 2) /
1828
1950
  kMicrosInSecond;
1829
1951
  std::string report = ToString(secs_elapsed) + "," +
1830
1952
  ToString(total_ops_done_snapshot - last_report_) +
@@ -1887,6 +2009,7 @@ static std::unordered_map<OperationType, std::string, std::hash<unsigned char>>
1887
2009
  class CombinedStats;
1888
2010
  class Stats {
1889
2011
  private:
2012
+ SystemClock* clock_;
1890
2013
  int id_;
1891
2014
  uint64_t start_ = 0;
1892
2015
  uint64_t sine_interval_;
@@ -1906,7 +2029,7 @@ class Stats {
1906
2029
  friend class CombinedStats;
1907
2030
 
1908
2031
  public:
1909
- Stats() { Start(-1); }
2032
+ Stats() : clock_(FLAGS_env->GetSystemClock().get()) { Start(-1); }
1910
2033
 
1911
2034
  void SetReporterAgent(ReporterAgent* reporter_agent) {
1912
2035
  reporter_agent_ = reporter_agent;
@@ -1921,8 +2044,8 @@ class Stats {
1921
2044
  last_report_done_ = 0;
1922
2045
  bytes_ = 0;
1923
2046
  seconds_ = 0;
1924
- start_ = FLAGS_env->NowMicros();
1925
- sine_interval_ = FLAGS_env->NowMicros();
2047
+ start_ = clock_->NowMicros();
2048
+ sine_interval_ = clock_->NowMicros();
1926
2049
  finish_ = start_;
1927
2050
  last_report_finish_ = start_;
1928
2051
  message_.clear();
@@ -1949,12 +2072,12 @@ class Stats {
1949
2072
  if (other.start_ < start_) start_ = other.start_;
1950
2073
  if (other.finish_ > finish_) finish_ = other.finish_;
1951
2074
 
1952
- // Just keep the messages from one thread
2075
+ // Just keep the messages from one thread.
1953
2076
  if (message_.empty()) message_ = other.message_;
1954
2077
  }
1955
2078
 
1956
2079
  void Stop() {
1957
- finish_ = FLAGS_env->NowMicros();
2080
+ finish_ = clock_->NowMicros();
1958
2081
  seconds_ = (finish_ - start_) * 1e-6;
1959
2082
  }
1960
2083
 
@@ -1974,7 +2097,7 @@ class Stats {
1974
2097
  "ElapsedTime", "Stage", "State", "OperationProperties");
1975
2098
 
1976
2099
  int64_t current_time = 0;
1977
- FLAGS_env->GetCurrentTime(&current_time);
2100
+ clock_->GetCurrentTime(&current_time).PermitUncheckedError();
1978
2101
  for (auto ts : thread_list) {
1979
2102
  fprintf(stderr, "%18" PRIu64 " %10s %12s %20s %13s %45s %12s",
1980
2103
  ts.thread_id,
@@ -1995,9 +2118,7 @@ class Stats {
1995
2118
  }
1996
2119
  }
1997
2120
 
1998
- void ResetSineInterval() {
1999
- sine_interval_ = FLAGS_env->NowMicros();
2000
- }
2121
+ void ResetSineInterval() { sine_interval_ = clock_->NowMicros(); }
2001
2122
 
2002
2123
  uint64_t GetSineInterval() {
2003
2124
  return sine_interval_;
@@ -2008,8 +2129,8 @@ class Stats {
2008
2129
  }
2009
2130
 
2010
2131
  void ResetLastOpTime() {
2011
- // Set to now to avoid latency from calls to SleepForMicroseconds
2012
- last_op_finish_ = FLAGS_env->NowMicros();
2132
+ // Set to now to avoid latency from calls to SleepForMicroseconds.
2133
+ last_op_finish_ = clock_->NowMicros();
2013
2134
  }
2014
2135
 
2015
2136
  void FinishedOps(DBWithColumnFamilies* db_with_cfh, DB* db, int64_t num_ops,
@@ -2018,7 +2139,7 @@ class Stats {
2018
2139
  reporter_agent_->ReportFinishedOps(num_ops);
2019
2140
  }
2020
2141
  if (FLAGS_histogram) {
2021
- uint64_t now = FLAGS_env->NowMicros();
2142
+ uint64_t now = clock_->NowMicros();
2022
2143
  uint64_t micros = now - last_op_finish_;
2023
2144
 
2024
2145
  if (hist_.find(op_type) == hist_.end())
@@ -2028,7 +2149,7 @@ class Stats {
2028
2149
  }
2029
2150
  hist_[op_type]->Add(micros);
2030
2151
 
2031
- if (micros > 20000 && !FLAGS_stats_interval) {
2152
+ if (micros >= FLAGS_slow_usecs && !FLAGS_stats_interval) {
2032
2153
  fprintf(stderr, "long op: %" PRIu64 " micros%30s\r", micros, "");
2033
2154
  fflush(stderr);
2034
2155
  }
@@ -2047,7 +2168,7 @@ class Stats {
2047
2168
  else next_report_ += 100000;
2048
2169
  fprintf(stderr, "... finished %" PRIu64 " ops%30s\r", done_, "");
2049
2170
  } else {
2050
- uint64_t now = FLAGS_env->NowMicros();
2171
+ uint64_t now = clock_->NowMicros();
2051
2172
  int64_t usecs_since_last = now - last_report_finish_;
2052
2173
 
2053
2174
  // Determine whether to print status where interval is either
@@ -2055,19 +2176,17 @@ class Stats {
2055
2176
 
2056
2177
  if (FLAGS_stats_interval_seconds &&
2057
2178
  usecs_since_last < (FLAGS_stats_interval_seconds * 1000000)) {
2058
- // Don't check again for this many operations
2179
+ // Don't check again for this many operations.
2059
2180
  next_report_ += FLAGS_stats_interval;
2060
2181
 
2061
2182
  } else {
2062
-
2063
2183
  fprintf(stderr,
2064
- "%s ... thread %d: (%" PRIu64 ",%" PRIu64 ") ops and "
2184
+ "%s ... thread %d: (%" PRIu64 ",%" PRIu64
2185
+ ") ops and "
2065
2186
  "(%.1f,%.1f) ops/second in (%.6f,%.6f) seconds\n",
2066
- FLAGS_env->TimeToString(now/1000000).c_str(),
2067
- id_,
2187
+ clock_->TimeToString(now / 1000000).c_str(), id_,
2068
2188
  done_ - last_report_done_, done_,
2069
- (done_ - last_report_done_) /
2070
- (usecs_since_last / 1000000.0),
2189
+ (done_ - last_report_done_) / (usecs_since_last / 1000000.0),
2071
2190
  done_ / ((now - start_) / 1000000.0),
2072
2191
  (now - last_report_finish_) / 1000000.0,
2073
2192
  (now - start_) / 1000000.0);
@@ -2097,7 +2216,13 @@ class Stats {
2097
2216
  }
2098
2217
  } else if (db) {
2099
2218
  if (db->GetProperty("rocksdb.stats", &stats)) {
2100
- fprintf(stderr, "%s\n", stats.c_str());
2219
+ fprintf(stderr, "%s", stats.c_str());
2220
+ }
2221
+ if (db->GetProperty("rocksdb.num-running-compactions", &stats)) {
2222
+ fprintf(stderr, "num-running-compactions: %s\n", stats.c_str());
2223
+ }
2224
+ if (db->GetProperty("rocksdb.num-running-flushes", &stats)) {
2225
+ fprintf(stderr, "num-running-flushes: %s\n\n", stats.c_str());
2101
2226
  }
2102
2227
  if (FLAGS_show_table_properties) {
2103
2228
  for (int level = 0; level < FLAGS_num_levels; ++level) {
@@ -2163,19 +2288,11 @@ class Stats {
2163
2288
  }
2164
2289
  }
2165
2290
  if (FLAGS_report_file_operations) {
2166
- ReportFileOpEnv* env = static_cast<ReportFileOpEnv*>(FLAGS_env);
2167
- ReportFileOpCounters* counters = env->counters();
2168
- fprintf(stdout, "Num files opened: %d\n",
2169
- counters->open_counter_.load(std::memory_order_relaxed));
2170
- fprintf(stdout, "Num Read(): %d\n",
2171
- counters->read_counter_.load(std::memory_order_relaxed));
2172
- fprintf(stdout, "Num Append(): %d\n",
2173
- counters->append_counter_.load(std::memory_order_relaxed));
2174
- fprintf(stdout, "Num bytes read: %" PRIu64 "\n",
2175
- counters->bytes_read_.load(std::memory_order_relaxed));
2176
- fprintf(stdout, "Num bytes written: %" PRIu64 "\n",
2177
- counters->bytes_written_.load(std::memory_order_relaxed));
2178
- env->reset();
2291
+ auto* counted_fs =
2292
+ FLAGS_env->GetFileSystem()->CheckedCast<CountedFileSystem>();
2293
+ assert(counted_fs);
2294
+ fprintf(stdout, "%s", counted_fs->PrintCounters().c_str());
2295
+ counted_fs->ResetCounters();
2179
2296
  }
2180
2297
  fflush(stdout);
2181
2298
  }
@@ -2308,8 +2425,8 @@ struct ThreadState {
2308
2425
  Stats stats;
2309
2426
  SharedState* shared;
2310
2427
 
2311
- explicit ThreadState(int index)
2312
- : tid(index), rand((FLAGS_seed ? FLAGS_seed : 1000) + index) {}
2428
+ explicit ThreadState(int index, int my_seed)
2429
+ : tid(index), rand(seed_base + my_seed) {}
2313
2430
  };
2314
2431
 
2315
2432
  class Duration {
@@ -2354,7 +2471,6 @@ class Benchmark {
2354
2471
  private:
2355
2472
  std::shared_ptr<Cache> cache_;
2356
2473
  std::shared_ptr<Cache> compressed_cache_;
2357
- std::shared_ptr<const FilterPolicy> filter_policy_;
2358
2474
  const SliceTransform* prefix_extractor_;
2359
2475
  DBWithColumnFamilies db_;
2360
2476
  std::vector<DBWithColumnFamilies> multi_dbs_;
@@ -2362,12 +2478,14 @@ class Benchmark {
2362
2478
  int key_size_;
2363
2479
  int user_timestamp_size_;
2364
2480
  int prefix_size_;
2481
+ int total_thread_count_;
2365
2482
  int64_t keys_per_prefix_;
2366
2483
  int64_t entries_per_batch_;
2367
2484
  int64_t writes_before_delete_range_;
2368
2485
  int64_t writes_per_range_tombstone_;
2369
2486
  int64_t range_tombstone_width_;
2370
2487
  int64_t max_num_range_tombstones_;
2488
+ ReadOptions read_options_;
2371
2489
  WriteOptions write_options_;
2372
2490
  Options open_options_; // keep options around to properly destroy db later
2373
2491
  #ifndef ROCKSDB_LITE
@@ -2381,7 +2499,8 @@ class Benchmark {
2381
2499
  int64_t readwrites_;
2382
2500
  int64_t merge_keys_;
2383
2501
  bool report_file_operations_;
2384
- bool use_blob_db_;
2502
+ bool use_blob_db_; // Stacked BlobDB
2503
+ bool read_operands_; // read via GetMergeOperands()
2385
2504
  std::vector<std::string> keys_;
2386
2505
 
2387
2506
  class ErrorHandlerListener : public EventListener {
@@ -2395,6 +2514,9 @@ class Benchmark {
2395
2514
 
2396
2515
  ~ErrorHandlerListener() override {}
2397
2516
 
2517
+ const char* Name() const override { return kClassName(); }
2518
+ static const char* kClassName() { return "ErrorHandlerListener"; }
2519
+
2398
2520
  void OnErrorRecoveryBegin(BackgroundErrorReason /*reason*/,
2399
2521
  Status /*bg_error*/,
2400
2522
  bool* auto_recovery) override {
@@ -2454,7 +2576,7 @@ class Benchmark {
2454
2576
  compressed);
2455
2577
  }
2456
2578
 
2457
- void PrintHeader() {
2579
+ void PrintHeader(const Options& options) {
2458
2580
  PrintEnvironment();
2459
2581
  fprintf(stdout,
2460
2582
  "Keys: %d bytes each (+ %d bytes user-defined timestamp)\n",
@@ -2504,20 +2626,9 @@ class Benchmark {
2504
2626
  fprintf(stdout, "Compression: %s\n", compression.c_str());
2505
2627
  fprintf(stdout, "Compression sampling rate: %" PRId64 "\n",
2506
2628
  FLAGS_sample_for_compression);
2507
-
2508
- switch (FLAGS_rep_factory) {
2509
- case kPrefixHash:
2510
- fprintf(stdout, "Memtablerep: prefix_hash\n");
2511
- break;
2512
- case kSkipList:
2513
- fprintf(stdout, "Memtablerep: skip_list\n");
2514
- break;
2515
- case kVectorRep:
2516
- fprintf(stdout, "Memtablerep: vector\n");
2517
- break;
2518
- case kHashLinkedList:
2519
- fprintf(stdout, "Memtablerep: hash_linkedlist\n");
2520
- break;
2629
+ if (options.memtable_factory != nullptr) {
2630
+ fprintf(stdout, "Memtablerep: %s\n",
2631
+ options.memtable_factory->GetId().c_str());
2521
2632
  }
2522
2633
  fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level);
2523
2634
 
@@ -2576,7 +2687,7 @@ class Benchmark {
2576
2687
  fprintf(stderr, "RocksDB: version %d.%d\n",
2577
2688
  kMajorVersion, kMinorVersion);
2578
2689
 
2579
- #if defined(__linux)
2690
+ #if defined(__linux) || defined(__APPLE__) || defined(__FreeBSD__)
2580
2691
  time_t now = time(nullptr);
2581
2692
  char buf[52];
2582
2693
  // Lint complains about ctime() usage, so replace it with ctime_r(). The
@@ -2584,6 +2695,7 @@ class Benchmark {
2584
2695
  fprintf(stderr, "Date: %s",
2585
2696
  ctime_r(&now, buf)); // ctime_r() adds newline
2586
2697
 
2698
+ #if defined(__linux)
2587
2699
  FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
2588
2700
  if (cpuinfo != nullptr) {
2589
2701
  char line[1000];
@@ -2608,6 +2720,45 @@ class Benchmark {
2608
2720
  fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
2609
2721
  fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
2610
2722
  }
2723
+ #elif defined(__APPLE__)
2724
+ struct host_basic_info h;
2725
+ size_t hlen = HOST_BASIC_INFO_COUNT;
2726
+ if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&h,
2727
+ (uint32_t*)&hlen) == KERN_SUCCESS) {
2728
+ std::string cpu_type;
2729
+ std::string cache_size;
2730
+ size_t hcache_size;
2731
+ hlen = sizeof(hcache_size);
2732
+ if (sysctlbyname("hw.cachelinesize", &hcache_size, &hlen, NULL, 0) == 0) {
2733
+ cache_size = std::to_string(hcache_size);
2734
+ }
2735
+ switch (h.cpu_type) {
2736
+ case CPU_TYPE_X86_64:
2737
+ cpu_type = "x86_64";
2738
+ break;
2739
+ case CPU_TYPE_ARM64:
2740
+ cpu_type = "arm64";
2741
+ break;
2742
+ default:
2743
+ break;
2744
+ }
2745
+ fprintf(stderr, "CPU: %d * %s\n", h.max_cpus, cpu_type.c_str());
2746
+ fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
2747
+ }
2748
+ #elif defined(__FreeBSD__)
2749
+ int ncpus;
2750
+ size_t len = sizeof(ncpus);
2751
+ int mib[2] = {CTL_HW, HW_NCPU};
2752
+ if (sysctl(mib, 2, &ncpus, &len, nullptr, 0) == 0) {
2753
+ char cpu_type[16];
2754
+ len = sizeof(cpu_type) - 1;
2755
+ mib[1] = HW_MACHINE;
2756
+ if (sysctl(mib, 2, cpu_type, &len, nullptr, 0) == 0) cpu_type[len] = 0;
2757
+
2758
+ fprintf(stderr, "CPU: %d * %s\n", ncpus, cpu_type);
2759
+ // no programmatic way to get the cache line size except on PPC
2760
+ }
2761
+ #endif
2611
2762
  #endif
2612
2763
  }
2613
2764
 
@@ -2668,22 +2819,54 @@ class Benchmark {
2668
2819
  }
2669
2820
  return cache;
2670
2821
  } else {
2671
- if (FLAGS_use_cache_memkind_kmem_allocator) {
2822
+ LRUCacheOptions opts(
2823
+ static_cast<size_t>(capacity), FLAGS_cache_numshardbits,
2824
+ false /*strict_capacity_limit*/, FLAGS_cache_high_pri_pool_ratio,
2672
2825
  #ifdef MEMKIND
2673
- return NewLRUCache(
2674
- static_cast<size_t>(capacity), FLAGS_cache_numshardbits,
2675
- false /*strict_capacity_limit*/, FLAGS_cache_high_pri_pool_ratio,
2676
- std::make_shared<MemkindKmemAllocator>());
2677
-
2826
+ FLAGS_use_cache_memkind_kmem_allocator
2827
+ ? std::make_shared<MemkindKmemAllocator>()
2828
+ : nullptr
2678
2829
  #else
2830
+ nullptr
2831
+ #endif
2832
+ );
2833
+ if (FLAGS_use_cache_memkind_kmem_allocator) {
2834
+ #ifndef MEMKIND
2679
2835
  fprintf(stderr, "Memkind library is not linked with the binary.");
2680
2836
  exit(1);
2681
2837
  #endif
2682
- } else {
2683
- return NewLRUCache(
2684
- static_cast<size_t>(capacity), FLAGS_cache_numshardbits,
2685
- false /*strict_capacity_limit*/, FLAGS_cache_high_pri_pool_ratio);
2686
2838
  }
2839
+ #ifndef ROCKSDB_LITE
2840
+ if (!FLAGS_secondary_cache_uri.empty()) {
2841
+ Status s = SecondaryCache::CreateFromString(
2842
+ ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache);
2843
+ if (secondary_cache == nullptr) {
2844
+ fprintf(
2845
+ stderr,
2846
+ "No secondary cache registered matching string: %s status=%s\n",
2847
+ FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
2848
+ exit(1);
2849
+ }
2850
+ opts.secondary_cache = secondary_cache;
2851
+ }
2852
+ #endif // ROCKSDB_LITE
2853
+
2854
+ if (FLAGS_use_compressed_secondary_cache) {
2855
+ CompressedSecondaryCacheOptions secondary_cache_opts;
2856
+ secondary_cache_opts.capacity = FLAGS_compressed_secondary_cache_size;
2857
+ secondary_cache_opts.num_shard_bits =
2858
+ FLAGS_compressed_secondary_cache_numshardbits;
2859
+ secondary_cache_opts.high_pri_pool_ratio =
2860
+ FLAGS_compressed_secondary_cache_high_pri_pool_ratio;
2861
+ secondary_cache_opts.compression_type =
2862
+ FLAGS_compressed_secondary_cache_compression_type_e;
2863
+ secondary_cache_opts.compress_format_version =
2864
+ FLAGS_compressed_secondary_cache_compress_format_version;
2865
+ opts.secondary_cache =
2866
+ NewCompressedSecondaryCache(secondary_cache_opts);
2867
+ }
2868
+
2869
+ return NewLRUCache(opts);
2687
2870
  }
2688
2871
  }
2689
2872
 
@@ -2691,18 +2874,12 @@ class Benchmark {
2691
2874
  Benchmark()
2692
2875
  : cache_(NewCache(FLAGS_cache_size)),
2693
2876
  compressed_cache_(NewCache(FLAGS_compressed_cache_size)),
2694
- filter_policy_(
2695
- FLAGS_use_ribbon_filter
2696
- ? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits)
2697
- : FLAGS_bloom_bits >= 0
2698
- ? NewBloomFilterPolicy(FLAGS_bloom_bits,
2699
- FLAGS_use_block_based_filter)
2700
- : nullptr),
2701
2877
  prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
2702
2878
  num_(FLAGS_num),
2703
2879
  key_size_(FLAGS_key_size),
2704
2880
  user_timestamp_size_(FLAGS_user_timestamp_size),
2705
2881
  prefix_size_(FLAGS_prefix_size),
2882
+ total_thread_count_(0),
2706
2883
  keys_per_prefix_(FLAGS_keys_per_prefix),
2707
2884
  entries_per_batch_(1),
2708
2885
  reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
@@ -2715,11 +2892,11 @@ class Benchmark {
2715
2892
  merge_keys_(FLAGS_merge_keys < 0 ? FLAGS_num : FLAGS_merge_keys),
2716
2893
  report_file_operations_(FLAGS_report_file_operations),
2717
2894
  #ifndef ROCKSDB_LITE
2718
- use_blob_db_(FLAGS_use_blob_db)
2895
+ use_blob_db_(FLAGS_use_blob_db), // Stacked BlobDB
2719
2896
  #else
2720
- use_blob_db_(false)
2897
+ use_blob_db_(false), // Stacked BlobDB
2721
2898
  #endif // !ROCKSDB_LITE
2722
- {
2899
+ read_operands_(false) {
2723
2900
  // use simcache instead of cache
2724
2901
  if (FLAGS_simcache_size >= 0) {
2725
2902
  if (FLAGS_cache_numshardbits >= 1) {
@@ -2731,13 +2908,9 @@ class Benchmark {
2731
2908
  }
2732
2909
 
2733
2910
  if (report_file_operations_) {
2734
- if (!FLAGS_hdfs.empty()) {
2735
- fprintf(stderr,
2736
- "--hdfs and --report_file_operations cannot be enabled "
2737
- "at the same time");
2738
- exit(1);
2739
- }
2740
- FLAGS_env = new ReportFileOpEnv(FLAGS_env);
2911
+ FLAGS_env = new CompositeEnvWrapper(
2912
+ FLAGS_env,
2913
+ std::make_shared<CountedFileSystem>(FLAGS_env->GetFileSystem()));
2741
2914
  }
2742
2915
 
2743
2916
  if (FLAGS_prefix_size > FLAGS_key_size) {
@@ -2760,6 +2933,7 @@ class Benchmark {
2760
2933
  }
2761
2934
  #ifndef ROCKSDB_LITE
2762
2935
  if (use_blob_db_) {
2936
+ // Stacked BlobDB
2763
2937
  blob_db::DestroyBlobDB(FLAGS_db, options, blob_db::BlobDBOptions());
2764
2938
  }
2765
2939
  #endif // !ROCKSDB_LITE
@@ -2782,10 +2956,19 @@ class Benchmark {
2782
2956
  }
2783
2957
  }
2784
2958
 
2785
- ~Benchmark() {
2959
+ void DeleteDBs() {
2786
2960
  db_.DeleteDBs();
2961
+ for (const DBWithColumnFamilies& dbwcf : multi_dbs_) {
2962
+ delete dbwcf.db;
2963
+ }
2964
+ }
2965
+
2966
+ ~Benchmark() {
2967
+ DeleteDBs();
2787
2968
  delete prefix_extractor_;
2788
2969
  if (cache_.get() != nullptr) {
2970
+ // Clear cache reference first
2971
+ open_options_.write_buffer_manager.reset();
2789
2972
  // this will leak, but we're shutting down so nobody cares
2790
2973
  cache_->DisownData();
2791
2974
  }
@@ -2914,10 +3097,7 @@ class Benchmark {
2914
3097
  }
2915
3098
 
2916
3099
  void ErrorExit() {
2917
- db_.DeleteDBs();
2918
- for (size_t i = 0; i < multi_dbs_.size(); i++) {
2919
- delete multi_dbs_[i].db;
2920
- }
3100
+ DeleteDBs();
2921
3101
  exit(1);
2922
3102
  }
2923
3103
 
@@ -2926,7 +3106,7 @@ class Benchmark {
2926
3106
  ErrorExit();
2927
3107
  }
2928
3108
  Open(&open_options_);
2929
- PrintHeader();
3109
+ PrintHeader(open_options_);
2930
3110
  std::stringstream benchmark_stream(FLAGS_benchmarks);
2931
3111
  std::string name;
2932
3112
  std::unique_ptr<ExpiredTimeFilter> filter;
@@ -2949,6 +3129,17 @@ class Benchmark {
2949
3129
  write_options_.sync = true;
2950
3130
  }
2951
3131
  write_options_.disableWAL = FLAGS_disable_wal;
3132
+ write_options_.rate_limiter_priority =
3133
+ FLAGS_rate_limit_auto_wal_flush ? Env::IO_USER : Env::IO_TOTAL;
3134
+ read_options_ = ReadOptions(FLAGS_verify_checksum, true);
3135
+ read_options_.total_order_seek = FLAGS_total_order_seek;
3136
+ read_options_.prefix_same_as_start = FLAGS_prefix_same_as_start;
3137
+ read_options_.rate_limiter_priority =
3138
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
3139
+ read_options_.tailing = FLAGS_use_tailing_iterator;
3140
+ read_options_.readahead_size = FLAGS_readahead_size;
3141
+ read_options_.adaptive_readahead = FLAGS_adaptive_readahead;
3142
+ read_options_.async_io = FLAGS_async_io;
2952
3143
 
2953
3144
  void (Benchmark::*method)(ThreadState*) = nullptr;
2954
3145
  void (Benchmark::*post_process_method)() = nullptr;
@@ -3019,12 +3210,13 @@ class Benchmark {
3019
3210
  } else if (name == "fillrandom") {
3020
3211
  fresh_db = true;
3021
3212
  method = &Benchmark::WriteRandom;
3022
- } else if (name == "filluniquerandom") {
3213
+ } else if (name == "filluniquerandom" ||
3214
+ name == "fillanddeleteuniquerandom") {
3023
3215
  fresh_db = true;
3024
3216
  if (num_threads > 1) {
3025
3217
  fprintf(stderr,
3026
- "filluniquerandom multithreaded not supported"
3027
- ", use 1 thread");
3218
+ "filluniquerandom and fillanddeleteuniquerandom "
3219
+ "multithreaded not supported, use 1 thread");
3028
3220
  num_threads = 1;
3029
3221
  }
3030
3222
  method = &Benchmark::WriteUniqueRandom;
@@ -3136,10 +3328,24 @@ class Benchmark {
3136
3328
  method = &Benchmark::Compact;
3137
3329
  } else if (name == "compactall") {
3138
3330
  CompactAll();
3331
+ #ifndef ROCKSDB_LITE
3332
+ } else if (name == "compact0") {
3333
+ CompactLevel(0);
3334
+ } else if (name == "compact1") {
3335
+ CompactLevel(1);
3336
+ } else if (name == "waitforcompaction") {
3337
+ WaitForCompaction();
3338
+ #endif
3339
+ } else if (name == "flush") {
3340
+ Flush();
3139
3341
  } else if (name == "crc32c") {
3140
3342
  method = &Benchmark::Crc32c;
3141
3343
  } else if (name == "xxhash") {
3142
3344
  method = &Benchmark::xxHash;
3345
+ } else if (name == "xxhash64") {
3346
+ method = &Benchmark::xxHash64;
3347
+ } else if (name == "xxh3") {
3348
+ method = &Benchmark::xxh3;
3143
3349
  } else if (name == "acquireload") {
3144
3350
  method = &Benchmark::AcquireLoad;
3145
3351
  } else if (name == "compress") {
@@ -3171,10 +3377,19 @@ class Benchmark {
3171
3377
  VerifyDBFromDB(FLAGS_truth_db);
3172
3378
  } else if (name == "levelstats") {
3173
3379
  PrintStats("rocksdb.levelstats");
3380
+ } else if (name == "memstats") {
3381
+ std::vector<std::string> keys{"rocksdb.num-immutable-mem-table",
3382
+ "rocksdb.cur-size-active-mem-table",
3383
+ "rocksdb.cur-size-all-mem-tables",
3384
+ "rocksdb.size-all-mem-tables",
3385
+ "rocksdb.num-entries-active-mem-table",
3386
+ "rocksdb.num-entries-imm-mem-tables"};
3387
+ PrintStats(keys);
3174
3388
  } else if (name == "sstables") {
3175
3389
  PrintStats("rocksdb.sstables");
3176
3390
  } else if (name == "stats_history") {
3177
3391
  PrintStatsHistory();
3392
+ #ifndef ROCKSDB_LITE
3178
3393
  } else if (name == "replay") {
3179
3394
  if (num_threads > 1) {
3180
3395
  fprintf(stderr, "Multi-threaded replay is not yet supported\n");
@@ -3185,8 +3400,18 @@ class Benchmark {
3185
3400
  ErrorExit();
3186
3401
  }
3187
3402
  method = &Benchmark::Replay;
3403
+ #endif // ROCKSDB_LITE
3188
3404
  } else if (name == "getmergeoperands") {
3189
3405
  method = &Benchmark::GetMergeOperands;
3406
+ #ifndef ROCKSDB_LITE
3407
+ } else if (name == "verifychecksum") {
3408
+ method = &Benchmark::VerifyChecksum;
3409
+ } else if (name == "verifyfilechecksums") {
3410
+ method = &Benchmark::VerifyFileChecksums;
3411
+ #endif // ROCKSDB_LITE
3412
+ } else if (name == "readrandomoperands") {
3413
+ read_operands_ = true;
3414
+ method = &Benchmark::ReadRandom;
3190
3415
  } else if (!name.empty()) { // No error message for empty name
3191
3416
  fprintf(stderr, "unknown benchmark '%s'\n", name.c_str());
3192
3417
  ErrorExit();
@@ -3441,7 +3666,8 @@ class Benchmark {
3441
3666
  arg[i].bm = this;
3442
3667
  arg[i].method = method;
3443
3668
  arg[i].shared = &shared;
3444
- arg[i].thread = new ThreadState(i);
3669
+ total_thread_count_++;
3670
+ arg[i].thread = new ThreadState(i, total_thread_count_);
3445
3671
  arg[i].thread->stats.SetReporterAgent(reporter_agent.get());
3446
3672
  arg[i].thread->shared = &shared;
3447
3673
  FLAGS_env->StartThread(ThreadBody, &arg[i]);
@@ -3474,44 +3700,42 @@ class Benchmark {
3474
3700
  return merge_stats;
3475
3701
  }
3476
3702
 
3477
- void Crc32c(ThreadState* thread) {
3478
- // Checksum about 500MB of data total
3703
+ template <OperationType kOpType, typename FnType, typename... Args>
3704
+ static inline void ChecksumBenchmark(FnType fn, ThreadState* thread,
3705
+ Args... args) {
3479
3706
  const int size = FLAGS_block_size; // use --block_size option for db_bench
3480
3707
  std::string labels = "(" + ToString(FLAGS_block_size) + " per op)";
3481
3708
  const char* label = labels.c_str();
3482
3709
 
3483
3710
  std::string data(size, 'x');
3484
- int64_t bytes = 0;
3485
- uint32_t crc = 0;
3486
- while (bytes < 500 * 1048576) {
3487
- crc = crc32c::Value(data.data(), size);
3488
- thread->stats.FinishedOps(nullptr, nullptr, 1, kCrc);
3711
+ uint64_t bytes = 0;
3712
+ uint32_t val = 0;
3713
+ while (bytes < 5000U * uint64_t{1048576}) { // ~5GB
3714
+ val += static_cast<uint32_t>(fn(data.data(), size, args...));
3715
+ thread->stats.FinishedOps(nullptr, nullptr, 1, kOpType);
3489
3716
  bytes += size;
3490
3717
  }
3491
3718
  // Print so result is not dead
3492
- fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));
3719
+ fprintf(stderr, "... val=0x%x\r", static_cast<unsigned int>(val));
3493
3720
 
3494
3721
  thread->stats.AddBytes(bytes);
3495
3722
  thread->stats.AddMessage(label);
3496
3723
  }
3497
3724
 
3725
+ void Crc32c(ThreadState* thread) {
3726
+ ChecksumBenchmark<kCrc>(crc32c::Value, thread);
3727
+ }
3728
+
3498
3729
  void xxHash(ThreadState* thread) {
3499
- // Checksum about 500MB of data total
3500
- const int size = 4096;
3501
- const char* label = "(4K per op)";
3502
- std::string data(size, 'x');
3503
- int64_t bytes = 0;
3504
- unsigned int xxh32 = 0;
3505
- while (bytes < 500 * 1048576) {
3506
- xxh32 = XXH32(data.data(), size, 0);
3507
- thread->stats.FinishedOps(nullptr, nullptr, 1, kHash);
3508
- bytes += size;
3509
- }
3510
- // Print so result is not dead
3511
- fprintf(stderr, "... xxh32=0x%x\r", static_cast<unsigned int>(xxh32));
3730
+ ChecksumBenchmark<kHash>(XXH32, thread, /*seed*/ 0);
3731
+ }
3512
3732
 
3513
- thread->stats.AddBytes(bytes);
3514
- thread->stats.AddMessage(label);
3733
+ void xxHash64(ThreadState* thread) {
3734
+ ChecksumBenchmark<kHash>(XXH64, thread, /*seed*/ 0);
3735
+ }
3736
+
3737
+ void xxh3(ThreadState* thread) {
3738
+ ChecksumBenchmark<kHash>(XXH3_64bits, thread);
3515
3739
  }
3516
3740
 
3517
3741
  void AcquireLoad(ThreadState* thread) {
@@ -3627,6 +3851,8 @@ class Benchmark {
3627
3851
  void InitializeOptionsFromFlags(Options* opts) {
3628
3852
  printf("Initializing RocksDB Options from command-line flags\n");
3629
3853
  Options& options = *opts;
3854
+ ConfigOptions config_options(options);
3855
+ config_options.ignore_unsupported_options = false;
3630
3856
 
3631
3857
  assert(db_.db == nullptr);
3632
3858
 
@@ -3656,11 +3882,14 @@ class Benchmark {
3656
3882
  options.use_direct_reads = FLAGS_use_direct_reads;
3657
3883
  options.use_direct_io_for_flush_and_compaction =
3658
3884
  FLAGS_use_direct_io_for_flush_and_compaction;
3885
+ options.manual_wal_flush = FLAGS_manual_wal_flush;
3886
+ options.wal_compression = FLAGS_wal_compression_e;
3659
3887
  #ifndef ROCKSDB_LITE
3660
3888
  options.ttl = FLAGS_fifo_compaction_ttl;
3661
3889
  options.compaction_options_fifo = CompactionOptionsFIFO(
3662
3890
  FLAGS_fifo_compaction_max_table_files_size_mb * 1024 * 1024,
3663
3891
  FLAGS_fifo_compaction_allow_compaction);
3892
+ options.compaction_options_fifo.age_for_warm = FLAGS_fifo_age_for_warm;
3664
3893
  #endif // ROCKSDB_LITE
3665
3894
  if (FLAGS_prefix_size != 0) {
3666
3895
  options.prefix_extractor.reset(
@@ -3686,8 +3915,6 @@ class Benchmark {
3686
3915
  }
3687
3916
  options.bloom_locality = FLAGS_bloom_locality;
3688
3917
  options.max_file_opening_threads = FLAGS_file_opening_threads;
3689
- options.new_table_reader_for_compaction_inputs =
3690
- FLAGS_new_table_reader_for_compaction_inputs;
3691
3918
  options.compaction_readahead_size = FLAGS_compaction_readahead_size;
3692
3919
  options.log_readahead_size = FLAGS_log_readahead_size;
3693
3920
  options.random_access_max_buffer_size = FLAGS_random_access_max_buffer_size;
@@ -3701,47 +3928,30 @@ class Benchmark {
3701
3928
  FLAGS_level_compaction_dynamic_level_bytes;
3702
3929
  options.max_bytes_for_level_multiplier =
3703
3930
  FLAGS_max_bytes_for_level_multiplier;
3704
- if ((FLAGS_prefix_size == 0) && (FLAGS_rep_factory == kPrefixHash ||
3705
- FLAGS_rep_factory == kHashLinkedList)) {
3931
+ Status s =
3932
+ CreateMemTableRepFactory(config_options, &options.memtable_factory);
3933
+ if (!s.ok()) {
3934
+ fprintf(stderr, "Could not create memtable factory: %s\n",
3935
+ s.ToString().c_str());
3936
+ exit(1);
3937
+ } else if ((FLAGS_prefix_size == 0) &&
3938
+ (options.memtable_factory->IsInstanceOf("prefix_hash") ||
3939
+ options.memtable_factory->IsInstanceOf("hash_linkedlist"))) {
3706
3940
  fprintf(stderr, "prefix_size should be non-zero if PrefixHash or "
3707
3941
  "HashLinkedList memtablerep is used\n");
3708
3942
  exit(1);
3709
3943
  }
3710
- switch (FLAGS_rep_factory) {
3711
- case kSkipList:
3712
- options.memtable_factory.reset(new SkipListFactory(
3713
- FLAGS_skip_list_lookahead));
3714
- break;
3715
- #ifndef ROCKSDB_LITE
3716
- case kPrefixHash:
3717
- options.memtable_factory.reset(
3718
- NewHashSkipListRepFactory(FLAGS_hash_bucket_count));
3719
- break;
3720
- case kHashLinkedList:
3721
- options.memtable_factory.reset(NewHashLinkListRepFactory(
3722
- FLAGS_hash_bucket_count));
3723
- break;
3724
- case kVectorRep:
3725
- options.memtable_factory.reset(
3726
- new VectorRepFactory
3727
- );
3728
- break;
3729
- #else
3730
- default:
3731
- fprintf(stderr, "Only skip list is supported in lite mode\n");
3732
- exit(1);
3733
- #endif // ROCKSDB_LITE
3734
- }
3735
3944
  if (FLAGS_use_plain_table) {
3736
3945
  #ifndef ROCKSDB_LITE
3737
- if (FLAGS_rep_factory != kPrefixHash &&
3738
- FLAGS_rep_factory != kHashLinkedList) {
3739
- fprintf(stderr, "Waring: plain table is used with skipList\n");
3946
+ if (!options.memtable_factory->IsInstanceOf("prefix_hash") &&
3947
+ !options.memtable_factory->IsInstanceOf("hash_linkedlist")) {
3948
+ fprintf(stderr, "Warning: plain table is used with %s\n",
3949
+ options.memtable_factory->Name());
3740
3950
  }
3741
3951
 
3742
3952
  int bloom_bits_per_key = FLAGS_bloom_bits;
3743
3953
  if (bloom_bits_per_key < 0) {
3744
- bloom_bits_per_key = 0;
3954
+ bloom_bits_per_key = PlainTableOptions().bloom_bits_per_key;
3745
3955
  }
3746
3956
 
3747
3957
  PlainTableOptions plain_table_options;
@@ -3777,6 +3987,8 @@ class Benchmark {
3777
3987
  #endif // ROCKSDB_LITE
3778
3988
  } else {
3779
3989
  BlockBasedTableOptions block_based_options;
3990
+ block_based_options.checksum =
3991
+ static_cast<ChecksumType>(FLAGS_checksum_type);
3780
3992
  if (FLAGS_use_hash_search) {
3781
3993
  if (FLAGS_prefix_size == 0) {
3782
3994
  fprintf(stderr,
@@ -3843,18 +4055,35 @@ class Benchmark {
3843
4055
  true;
3844
4056
  }
3845
4057
  block_based_options.block_cache = cache_;
4058
+ block_based_options.reserve_table_reader_memory =
4059
+ FLAGS_reserve_table_reader_memory;
3846
4060
  block_based_options.block_cache_compressed = compressed_cache_;
3847
4061
  block_based_options.block_size = FLAGS_block_size;
3848
4062
  block_based_options.block_restart_interval = FLAGS_block_restart_interval;
3849
4063
  block_based_options.index_block_restart_interval =
3850
4064
  FLAGS_index_block_restart_interval;
3851
- block_based_options.filter_policy = filter_policy_;
3852
4065
  block_based_options.format_version =
3853
4066
  static_cast<uint32_t>(FLAGS_format_version);
3854
4067
  block_based_options.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit;
3855
4068
  block_based_options.enable_index_compression =
3856
4069
  FLAGS_enable_index_compression;
3857
4070
  block_based_options.block_align = FLAGS_block_align;
4071
+ block_based_options.whole_key_filtering = FLAGS_whole_key_filtering;
4072
+ BlockBasedTableOptions::PrepopulateBlockCache prepopulate_block_cache =
4073
+ block_based_options.prepopulate_block_cache;
4074
+ switch (FLAGS_prepopulate_block_cache) {
4075
+ case 0:
4076
+ prepopulate_block_cache =
4077
+ BlockBasedTableOptions::PrepopulateBlockCache::kDisable;
4078
+ break;
4079
+ case 1:
4080
+ prepopulate_block_cache =
4081
+ BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
4082
+ break;
4083
+ default:
4084
+ fprintf(stderr, "Unknown prepopulate block cache mode\n");
4085
+ }
4086
+ block_based_options.prepopulate_block_cache = prepopulate_block_cache;
3858
4087
  if (FLAGS_use_data_block_hash_index) {
3859
4088
  block_based_options.data_block_index_type =
3860
4089
  ROCKSDB_NAMESPACE::BlockBasedTableOptions::kDataBlockBinaryAndHash;
@@ -3923,6 +4152,9 @@ class Benchmark {
3923
4152
  options.level0_slowdown_writes_trigger =
3924
4153
  FLAGS_level0_slowdown_writes_trigger;
3925
4154
  options.compression = FLAGS_compression_type_e;
4155
+ if (FLAGS_simulate_hybrid_fs_file != "") {
4156
+ options.bottommost_temperature = Temperature::kWarm;
4157
+ }
3926
4158
  options.sample_for_compression = FLAGS_sample_for_compression;
3927
4159
  options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
3928
4160
  options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;
@@ -3939,8 +4171,6 @@ class Benchmark {
3939
4171
  options.compression_per_level[i] = FLAGS_compression_type_e;
3940
4172
  }
3941
4173
  }
3942
- options.soft_rate_limit = FLAGS_soft_rate_limit;
3943
- options.hard_rate_limit = FLAGS_hard_rate_limit;
3944
4174
  options.soft_pending_compaction_bytes_limit =
3945
4175
  FLAGS_soft_pending_compaction_bytes_limit;
3946
4176
  options.hard_pending_compaction_bytes_limit =
@@ -3948,6 +4178,8 @@ class Benchmark {
3948
4178
  options.delayed_write_rate = FLAGS_delayed_write_rate;
3949
4179
  options.allow_concurrent_memtable_write =
3950
4180
  FLAGS_allow_concurrent_memtable_write;
4181
+ options.experimental_mempurge_threshold =
4182
+ FLAGS_experimental_mempurge_threshold;
3951
4183
  options.inplace_update_support = FLAGS_inplace_update_support;
3952
4184
  options.inplace_update_num_locks = FLAGS_inplace_update_num_locks;
3953
4185
  options.enable_write_thread_adaptive_yield =
@@ -3956,14 +4188,16 @@ class Benchmark {
3956
4188
  options.unordered_write = FLAGS_unordered_write;
3957
4189
  options.write_thread_max_yield_usec = FLAGS_write_thread_max_yield_usec;
3958
4190
  options.write_thread_slow_yield_usec = FLAGS_write_thread_slow_yield_usec;
3959
- options.rate_limit_delay_max_milliseconds =
3960
- FLAGS_rate_limit_delay_max_milliseconds;
3961
4191
  options.table_cache_numshardbits = FLAGS_table_cache_numshardbits;
3962
4192
  options.max_compaction_bytes = FLAGS_max_compaction_bytes;
3963
4193
  options.disable_auto_compactions = FLAGS_disable_auto_compactions;
3964
4194
  options.optimize_filters_for_hits = FLAGS_optimize_filters_for_hits;
4195
+ options.paranoid_checks = FLAGS_paranoid_checks;
4196
+ options.force_consistency_checks = FLAGS_force_consistency_checks;
4197
+ options.check_flush_compaction_key_order =
4198
+ FLAGS_check_flush_compaction_key_order;
3965
4199
  options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds;
3966
-
4200
+ options.ttl = FLAGS_ttl_seconds;
3967
4201
  // fill storage options
3968
4202
  options.advise_random_on_open = FLAGS_advise_random_on_open;
3969
4203
  options.access_hint_on_compaction_start = FLAGS_compaction_fadvice_e;
@@ -3972,12 +4206,14 @@ class Benchmark {
3972
4206
  options.wal_bytes_per_sync = FLAGS_wal_bytes_per_sync;
3973
4207
 
3974
4208
  // merge operator options
3975
- options.merge_operator = MergeOperators::CreateFromStringId(
3976
- FLAGS_merge_operator);
3977
- if (options.merge_operator == nullptr && !FLAGS_merge_operator.empty()) {
3978
- fprintf(stderr, "invalid merge operator: %s\n",
3979
- FLAGS_merge_operator.c_str());
3980
- exit(1);
4209
+ if (!FLAGS_merge_operator.empty()) {
4210
+ s = MergeOperator::CreateFromString(config_options, FLAGS_merge_operator,
4211
+ &options.merge_operator);
4212
+ if (!s.ok()) {
4213
+ fprintf(stderr, "invalid merge operator[%s]: %s\n",
4214
+ FLAGS_merge_operator.c_str(), s.ToString().c_str());
4215
+ exit(1);
4216
+ }
3981
4217
  }
3982
4218
  options.max_successive_merges = FLAGS_max_successive_merges;
3983
4219
  options.report_bg_io_stats = FLAGS_report_bg_io_stats;
@@ -4005,6 +4241,8 @@ class Benchmark {
4005
4241
  }
4006
4242
  options.compaction_options_universal.allow_trivial_move =
4007
4243
  FLAGS_universal_allow_trivial_move;
4244
+ options.compaction_options_universal.incremental =
4245
+ FLAGS_universal_incremental;
4008
4246
  if (FLAGS_thread_status_per_interval > 0) {
4009
4247
  options.enable_thread_tracking = true;
4010
4248
  }
@@ -4014,9 +4252,24 @@ class Benchmark {
4014
4252
  fprintf(stderr, "Only 64 bits timestamps are supported.\n");
4015
4253
  exit(1);
4016
4254
  }
4017
- options.comparator = ROCKSDB_NAMESPACE::test::ComparatorWithU64Ts();
4255
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
4018
4256
  }
4019
4257
 
4258
+ // Integrated BlobDB
4259
+ options.enable_blob_files = FLAGS_enable_blob_files;
4260
+ options.min_blob_size = FLAGS_min_blob_size;
4261
+ options.blob_file_size = FLAGS_blob_file_size;
4262
+ options.blob_compression_type =
4263
+ StringToCompressionType(FLAGS_blob_compression_type.c_str());
4264
+ options.enable_blob_garbage_collection =
4265
+ FLAGS_enable_blob_garbage_collection;
4266
+ options.blob_garbage_collection_age_cutoff =
4267
+ FLAGS_blob_garbage_collection_age_cutoff;
4268
+ options.blob_garbage_collection_force_threshold =
4269
+ FLAGS_blob_garbage_collection_force_threshold;
4270
+ options.blob_compaction_readahead_size =
4271
+ FLAGS_blob_compaction_readahead_size;
4272
+
4020
4273
  #ifndef ROCKSDB_LITE
4021
4274
  if (FLAGS_readonly && FLAGS_transaction_db) {
4022
4275
  fprintf(stderr, "Cannot use readonly flag with transaction_db\n");
@@ -4046,6 +4299,7 @@ class Benchmark {
4046
4299
  options.persist_stats_to_disk = FLAGS_persist_stats_to_disk;
4047
4300
  options.stats_history_buffer_size =
4048
4301
  static_cast<size_t>(FLAGS_stats_history_buffer_size);
4302
+ options.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery;
4049
4303
 
4050
4304
  options.compression_opts.level = FLAGS_compression_level;
4051
4305
  options.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes;
@@ -4053,6 +4307,8 @@ class Benchmark {
4053
4307
  FLAGS_compression_zstd_max_train_bytes;
4054
4308
  options.compression_opts.parallel_threads =
4055
4309
  FLAGS_compression_parallel_threads;
4310
+ options.compression_opts.max_dict_buffer_bytes =
4311
+ FLAGS_compression_max_dict_buffer_bytes;
4056
4312
  // If this is a block based table, set some related options
4057
4313
  auto table_options =
4058
4314
  options.table_factory->GetOptions<BlockBasedTableOptions>();
@@ -4060,12 +4316,26 @@ class Benchmark {
4060
4316
  if (FLAGS_cache_size) {
4061
4317
  table_options->block_cache = cache_;
4062
4318
  }
4063
- if (FLAGS_bloom_bits >= 0) {
4319
+ if (FLAGS_bloom_bits < 0) {
4320
+ table_options->filter_policy = BlockBasedTableOptions().filter_policy;
4321
+ } else if (FLAGS_bloom_bits == 0) {
4322
+ table_options->filter_policy.reset();
4323
+ } else if (FLAGS_use_block_based_filter) {
4324
+ // Use back-door way of enabling obsolete block-based Bloom
4325
+ Status s = FilterPolicy::CreateFromString(
4326
+ ConfigOptions(),
4327
+ "rocksdb.internal.DeprecatedBlockBasedBloomFilter:" +
4328
+ ROCKSDB_NAMESPACE::ToString(FLAGS_bloom_bits),
4329
+ &table_options->filter_policy);
4330
+ if (!s.ok()) {
4331
+ fprintf(stderr, "failure creating obsolete block-based filter: %s\n",
4332
+ s.ToString().c_str());
4333
+ exit(1);
4334
+ }
4335
+ } else {
4064
4336
  table_options->filter_policy.reset(
4065
- FLAGS_use_ribbon_filter
4066
- ? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits)
4067
- : NewBloomFilterPolicy(FLAGS_bloom_bits,
4068
- FLAGS_use_block_based_filter));
4337
+ FLAGS_use_ribbon_filter ? NewRibbonFilterPolicy(FLAGS_bloom_bits)
4338
+ : NewBloomFilterPolicy(FLAGS_bloom_bits));
4069
4339
  }
4070
4340
  }
4071
4341
  if (FLAGS_row_cache_size) {
@@ -4090,15 +4360,8 @@ class Benchmark {
4090
4360
  }
4091
4361
 
4092
4362
  if (FLAGS_rate_limiter_bytes_per_sec > 0) {
4093
- if (FLAGS_rate_limit_bg_reads &&
4094
- !FLAGS_new_table_reader_for_compaction_inputs) {
4095
- fprintf(stderr,
4096
- "rate limit compaction reads must have "
4097
- "new_table_reader_for_compaction_inputs set\n");
4098
- exit(1);
4099
- }
4100
4363
  options.rate_limiter.reset(NewGenericRateLimiter(
4101
- FLAGS_rate_limiter_bytes_per_sec, 100 * 1000 /* refill_period_us */,
4364
+ FLAGS_rate_limiter_bytes_per_sec, FLAGS_rate_limiter_refill_period_us,
4102
4365
  10 /* fairness */,
4103
4366
  FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly
4104
4367
  : RateLimiter::Mode::kWritesOnly,
@@ -4106,6 +4369,12 @@ class Benchmark {
4106
4369
  }
4107
4370
 
4108
4371
  options.listeners.emplace_back(listener_);
4372
+
4373
+ if (FLAGS_file_checksum) {
4374
+ options.file_checksum_gen_factory.reset(
4375
+ new FileChecksumGenCrc32cFactory());
4376
+ }
4377
+
4109
4378
  if (FLAGS_num_multi_db <= 1) {
4110
4379
  OpenDb(options, FLAGS_db, &db_);
4111
4380
  } else {
@@ -4130,7 +4399,7 @@ class Benchmark {
4130
4399
  if (FLAGS_use_existing_keys) {
4131
4400
  // Only work on single database
4132
4401
  assert(db_.db != nullptr);
4133
- ReadOptions read_opts;
4402
+ ReadOptions read_opts; // before read_options_ initialized
4134
4403
  read_opts.total_order_seek = true;
4135
4404
  Iterator* iter = db_.db->NewIterator(read_opts);
4136
4405
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@@ -4151,6 +4420,7 @@ class Benchmark {
4151
4420
 
4152
4421
  void OpenDb(Options options, const std::string& db_name,
4153
4422
  DBWithColumnFamilies* db) {
4423
+ uint64_t open_start = FLAGS_report_open_timing ? FLAGS_env->NowNanos() : 0;
4154
4424
  Status s;
4155
4425
  // Open with column families if necessary.
4156
4426
  if (FLAGS_num_column_families > 1) {
@@ -4245,6 +4515,7 @@ class Benchmark {
4245
4515
  db->db = ptr;
4246
4516
  }
4247
4517
  } else if (FLAGS_use_blob_db) {
4518
+ // Stacked BlobDB
4248
4519
  blob_db::BlobDBOptions blob_db_options;
4249
4520
  blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc;
4250
4521
  blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff;
@@ -4290,6 +4561,11 @@ class Benchmark {
4290
4561
  } else {
4291
4562
  s = DB::Open(options, db_name, &db->db);
4292
4563
  }
4564
+ if (FLAGS_report_open_timing) {
4565
+ std::cout << "OpenDb: "
4566
+ << (FLAGS_env->NowNanos() - open_start) / 1000000.0
4567
+ << " milliseconds\n";
4568
+ }
4293
4569
  if (!s.ok()) {
4294
4570
  fprintf(stderr, "open error: %s\n", s.ToString().c_str());
4295
4571
  exit(1);
@@ -4336,7 +4612,7 @@ class Benchmark {
4336
4612
  values_[i] = i;
4337
4613
  }
4338
4614
  RandomShuffle(values_.begin(), values_.end(),
4339
- static_cast<uint32_t>(FLAGS_seed));
4615
+ static_cast<uint32_t>(seed_base));
4340
4616
  }
4341
4617
  }
4342
4618
 
@@ -4354,6 +4630,13 @@ class Benchmark {
4354
4630
  return std::numeric_limits<uint64_t>::max();
4355
4631
  }
4356
4632
 
4633
+ // Only available for UNIQUE_RANDOM mode.
4634
+ uint64_t Fetch(uint64_t index) {
4635
+ assert(mode_ == UNIQUE_RANDOM);
4636
+ assert(index < values_.size());
4637
+ return values_[index];
4638
+ }
4639
+
4357
4640
  private:
4358
4641
  Random64* rand_;
4359
4642
  WriteMode mode_;
@@ -4400,10 +4683,10 @@ class Benchmark {
4400
4683
  }
4401
4684
 
4402
4685
  Duration duration(test_duration, max_ops, ops_per_stage);
4686
+ const uint64_t num_per_key_gen = num_ + max_num_range_tombstones_;
4403
4687
  for (size_t i = 0; i < num_key_gens; i++) {
4404
4688
  key_gens[i].reset(new KeyGenerator(&(thread->rand), write_mode,
4405
- num_ + max_num_range_tombstones_,
4406
- ops_per_stage));
4689
+ num_per_key_gen, ops_per_stage));
4407
4690
  }
4408
4691
 
4409
4692
  if (num_ != FLAGS_num) {
@@ -4414,7 +4697,7 @@ class Benchmark {
4414
4697
 
4415
4698
  RandomGenerator gen;
4416
4699
  WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
4417
- user_timestamp_size_);
4700
+ /*protection_bytes_per_key=*/0, user_timestamp_size_);
4418
4701
  Status s;
4419
4702
  int64_t bytes = 0;
4420
4703
 
@@ -4424,6 +4707,79 @@ class Benchmark {
4424
4707
  Slice begin_key = AllocateKey(&begin_key_guard);
4425
4708
  std::unique_ptr<const char[]> end_key_guard;
4426
4709
  Slice end_key = AllocateKey(&end_key_guard);
4710
+ double p = 0.0;
4711
+ uint64_t num_overwrites = 0, num_unique_keys = 0, num_selective_deletes = 0;
4712
+ // If user set overwrite_probability flag,
4713
+ // check if value is in [0.0,1.0].
4714
+ if (FLAGS_overwrite_probability > 0.0) {
4715
+ p = FLAGS_overwrite_probability > 1.0 ? 1.0 : FLAGS_overwrite_probability;
4716
+ // If overwrite set by user, and UNIQUE_RANDOM mode on,
4717
+ // the overwrite_window_size must be > 0.
4718
+ if (write_mode == UNIQUE_RANDOM && FLAGS_overwrite_window_size == 0) {
4719
+ fprintf(stderr,
4720
+ "Overwrite_window_size must be strictly greater than 0.\n");
4721
+ ErrorExit();
4722
+ }
4723
+ }
4724
+
4725
+ // Default_random_engine provides slightly
4726
+ // improved throughput over mt19937.
4727
+ std::default_random_engine overwrite_gen{
4728
+ static_cast<unsigned int>(seed_base)};
4729
+ std::bernoulli_distribution overwrite_decider(p);
4730
+
4731
+ // Inserted key window is filled with the last N
4732
+ // keys previously inserted into the DB (with
4733
+ // N=FLAGS_overwrite_window_size).
4734
+ // We use a deque struct because:
4735
+ // - random access is O(1)
4736
+ // - insertion/removal at beginning/end is also O(1).
4737
+ std::deque<int64_t> inserted_key_window;
4738
+ Random64 reservoir_id_gen(seed_base);
4739
+
4740
+ // --- Variables used in disposable/persistent keys simulation:
4741
+ // The following variables are used when
4742
+ // disposable_entries_batch_size is >0. We simualte a workload
4743
+ // where the following sequence is repeated multiple times:
4744
+ // "A set of keys S1 is inserted ('disposable entries'), then after
4745
+ // some delay another set of keys S2 is inserted ('persistent entries')
4746
+ // and the first set of keys S1 is deleted. S2 artificially represents
4747
+ // the insertion of hypothetical results from some undefined computation
4748
+ // done on the first set of keys S1. The next sequence can start as soon
4749
+ // as the last disposable entry in the set S1 of this sequence is
4750
+ // inserted, if the delay is non negligible"
4751
+ bool skip_for_loop = false, is_disposable_entry = true;
4752
+ std::vector<uint64_t> disposable_entries_index(num_key_gens, 0);
4753
+ std::vector<uint64_t> persistent_ent_and_del_index(num_key_gens, 0);
4754
+ const uint64_t kNumDispAndPersEntries =
4755
+ FLAGS_disposable_entries_batch_size +
4756
+ FLAGS_persistent_entries_batch_size;
4757
+ if (kNumDispAndPersEntries > 0) {
4758
+ if ((write_mode != UNIQUE_RANDOM) || (writes_per_range_tombstone_ > 0) ||
4759
+ (p > 0.0)) {
4760
+ fprintf(
4761
+ stderr,
4762
+ "Disposable/persistent deletes are not compatible with overwrites "
4763
+ "and DeleteRanges; and are only supported in filluniquerandom.\n");
4764
+ ErrorExit();
4765
+ }
4766
+ if (FLAGS_disposable_entries_value_size < 0 ||
4767
+ FLAGS_persistent_entries_value_size < 0) {
4768
+ fprintf(
4769
+ stderr,
4770
+ "disposable_entries_value_size and persistent_entries_value_size"
4771
+ "have to be positive.\n");
4772
+ ErrorExit();
4773
+ }
4774
+ }
4775
+ Random rnd_disposable_entry(static_cast<uint32_t>(seed_base));
4776
+ std::string random_value;
4777
+ // Queue that stores scheduled timestamp of disposable entries deletes,
4778
+ // along with starting index of disposable entry keys to delete.
4779
+ std::vector<std::queue<std::pair<uint64_t, uint64_t>>> disposable_entries_q(
4780
+ num_key_gens);
4781
+ // --- End of variables used in disposable/persistent keys simulation.
4782
+
4427
4783
  std::vector<std::unique_ptr<const char[]>> expanded_key_guards;
4428
4784
  std::vector<Slice> expanded_keys;
4429
4785
  if (FLAGS_expand_range_tombstones) {
@@ -4440,7 +4796,10 @@ class Benchmark {
4440
4796
 
4441
4797
  int64_t stage = 0;
4442
4798
  int64_t num_written = 0;
4443
- while (!duration.Done(entries_per_batch_)) {
4799
+ int64_t next_seq_db_at = num_ops;
4800
+ size_t id = 0;
4801
+
4802
+ while ((num_per_key_gen != 0) && !duration.Done(entries_per_batch_)) {
4444
4803
  if (duration.GetStage() != stage) {
4445
4804
  stage = duration.GetStage();
4446
4805
  if (db_.db != nullptr) {
@@ -4452,17 +4811,144 @@ class Benchmark {
4452
4811
  }
4453
4812
  }
4454
4813
 
4455
- size_t id = thread->rand.Next() % num_key_gens;
4814
+ if (write_mode != SEQUENTIAL) {
4815
+ id = thread->rand.Next() % num_key_gens;
4816
+ } else {
4817
+ // When doing a sequential load with multiple databases, load them in
4818
+ // order rather than all at the same time to avoid:
4819
+ // 1) long delays between flushing memtables
4820
+ // 2) flushing memtables for all of them at the same point in time
4821
+ // 3) not putting the same number of keys in each database
4822
+ if (num_written >= next_seq_db_at) {
4823
+ next_seq_db_at += num_ops;
4824
+ id++;
4825
+ if (id >= num_key_gens) {
4826
+ fprintf(stderr, "Logic error. Filled all databases\n");
4827
+ ErrorExit();
4828
+ }
4829
+ }
4830
+ }
4456
4831
  DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id);
4832
+
4457
4833
  batch.Clear();
4458
4834
  int64_t batch_bytes = 0;
4459
4835
 
4460
4836
  for (int64_t j = 0; j < entries_per_batch_; j++) {
4461
- int64_t rand_num = key_gens[id]->Next();
4837
+ int64_t rand_num = 0;
4838
+ if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
4839
+ if ((inserted_key_window.size() > 0) &&
4840
+ overwrite_decider(overwrite_gen)) {
4841
+ num_overwrites++;
4842
+ rand_num = inserted_key_window[reservoir_id_gen.Next() %
4843
+ inserted_key_window.size()];
4844
+ } else {
4845
+ num_unique_keys++;
4846
+ rand_num = key_gens[id]->Next();
4847
+ if (inserted_key_window.size() < FLAGS_overwrite_window_size) {
4848
+ inserted_key_window.push_back(rand_num);
4849
+ } else {
4850
+ inserted_key_window.pop_front();
4851
+ inserted_key_window.push_back(rand_num);
4852
+ }
4853
+ }
4854
+ } else if (kNumDispAndPersEntries > 0) {
4855
+ // Check if queue is non-empty and if we need to insert
4856
+ // 'persistent' KV entries (KV entries that are never deleted)
4857
+ // and delete disposable entries previously inserted.
4858
+ if (!disposable_entries_q[id].empty() &&
4859
+ (disposable_entries_q[id].front().first <
4860
+ FLAGS_env->NowMicros())) {
4861
+ // If we need to perform a "merge op" pattern,
4862
+ // we first write all the persistent KV entries not targeted
4863
+ // by deletes, and then we write the disposable entries deletes.
4864
+ if (persistent_ent_and_del_index[id] <
4865
+ FLAGS_persistent_entries_batch_size) {
4866
+ // Generate key to insert.
4867
+ rand_num =
4868
+ key_gens[id]->Fetch(disposable_entries_q[id].front().second +
4869
+ FLAGS_disposable_entries_batch_size +
4870
+ persistent_ent_and_del_index[id]);
4871
+ persistent_ent_and_del_index[id]++;
4872
+ is_disposable_entry = false;
4873
+ skip_for_loop = false;
4874
+ } else if (persistent_ent_and_del_index[id] <
4875
+ kNumDispAndPersEntries) {
4876
+ // Find key of the entry to delete.
4877
+ rand_num =
4878
+ key_gens[id]->Fetch(disposable_entries_q[id].front().second +
4879
+ (persistent_ent_and_del_index[id] -
4880
+ FLAGS_persistent_entries_batch_size));
4881
+ persistent_ent_and_del_index[id]++;
4882
+ GenerateKeyFromInt(rand_num, FLAGS_num, &key);
4883
+ // For the delete operation, everything happens here and we
4884
+ // skip the rest of the for-loop, which is designed for
4885
+ // inserts.
4886
+ if (FLAGS_num_column_families <= 1) {
4887
+ batch.Delete(key);
4888
+ } else {
4889
+ // We use same rand_num as seed for key and column family so
4890
+ // that we can deterministically find the cfh corresponding to a
4891
+ // particular key while reading the key.
4892
+ batch.Delete(db_with_cfh->GetCfh(rand_num), key);
4893
+ }
4894
+ // A delete only includes Key+Timestamp (no value).
4895
+ batch_bytes += key_size_ + user_timestamp_size_;
4896
+ bytes += key_size_ + user_timestamp_size_;
4897
+ num_selective_deletes++;
4898
+ // Skip rest of the for-loop (j=0, j<entries_per_batch_,j++).
4899
+ skip_for_loop = true;
4900
+ } else {
4901
+ assert(false); // should never reach this point.
4902
+ }
4903
+ // If disposable_entries_q needs to be updated (ie: when a selective
4904
+ // insert+delete was successfully completed, pop the job out of the
4905
+ // queue).
4906
+ if (!disposable_entries_q[id].empty() &&
4907
+ (disposable_entries_q[id].front().first <
4908
+ FLAGS_env->NowMicros()) &&
4909
+ persistent_ent_and_del_index[id] == kNumDispAndPersEntries) {
4910
+ disposable_entries_q[id].pop();
4911
+ persistent_ent_and_del_index[id] = 0;
4912
+ }
4913
+
4914
+ // If we are deleting disposable entries, skip the rest of the
4915
+ // for-loop since there is no key-value inserts at this moment in
4916
+ // time.
4917
+ if (skip_for_loop) {
4918
+ continue;
4919
+ }
4920
+
4921
+ }
4922
+ // If no job is in the queue, then we keep inserting disposable KV
4923
+ // entries that will be deleted later by a series of deletes.
4924
+ else {
4925
+ rand_num = key_gens[id]->Fetch(disposable_entries_index[id]);
4926
+ disposable_entries_index[id]++;
4927
+ is_disposable_entry = true;
4928
+ if ((disposable_entries_index[id] %
4929
+ FLAGS_disposable_entries_batch_size) == 0) {
4930
+ // Skip the persistent KV entries inserts for now
4931
+ disposable_entries_index[id] +=
4932
+ FLAGS_persistent_entries_batch_size;
4933
+ }
4934
+ }
4935
+ } else {
4936
+ rand_num = key_gens[id]->Next();
4937
+ }
4462
4938
  GenerateKeyFromInt(rand_num, FLAGS_num, &key);
4463
- Slice val = gen.Generate();
4939
+ Slice val;
4940
+ if (kNumDispAndPersEntries > 0) {
4941
+ random_value = rnd_disposable_entry.RandomString(
4942
+ is_disposable_entry ? FLAGS_disposable_entries_value_size
4943
+ : FLAGS_persistent_entries_value_size);
4944
+ val = Slice(random_value);
4945
+ num_unique_keys++;
4946
+ } else {
4947
+ val = gen.Generate();
4948
+ }
4464
4949
  if (use_blob_db_) {
4465
4950
  #ifndef ROCKSDB_LITE
4951
+ // Stacked BlobDB
4466
4952
  blob_db::BlobDB* blobdb =
4467
4953
  static_cast<blob_db::BlobDB*>(db_with_cfh->db);
4468
4954
  if (FLAGS_blob_db_max_ttl_range > 0) {
@@ -4484,6 +4970,23 @@ class Benchmark {
4484
4970
  batch_bytes += val.size() + key_size_ + user_timestamp_size_;
4485
4971
  bytes += val.size() + key_size_ + user_timestamp_size_;
4486
4972
  ++num_written;
4973
+
4974
+ // If all disposable entries have been inserted, then we need to
4975
+ // add in the job queue a call for 'persistent entry insertions +
4976
+ // disposable entry deletions'.
4977
+ if (kNumDispAndPersEntries > 0 && is_disposable_entry &&
4978
+ ((disposable_entries_index[id] % kNumDispAndPersEntries) == 0)) {
4979
+ // Queue contains [timestamp, starting_idx],
4980
+ // timestamp = current_time + delay (minimum aboslute time when to
4981
+ // start inserting the selective deletes) starting_idx = index in the
4982
+ // keygen of the rand_num to generate the key of the first KV entry to
4983
+ // delete (= key of the first selective delete).
4984
+ disposable_entries_q[id].push(std::make_pair(
4985
+ FLAGS_env->NowMicros() +
4986
+ FLAGS_disposable_entries_delete_delay /* timestamp */,
4987
+ disposable_entries_index[id] - kNumDispAndPersEntries
4988
+ /*starting idx*/));
4989
+ }
4487
4990
  if (writes_per_range_tombstone_ > 0 &&
4488
4991
  num_written > writes_before_delete_range_ &&
4489
4992
  (num_written - writes_before_delete_range_) /
@@ -4500,6 +5003,7 @@ class Benchmark {
4500
5003
  &expanded_keys[offset]);
4501
5004
  if (use_blob_db_) {
4502
5005
  #ifndef ROCKSDB_LITE
5006
+ // Stacked BlobDB
4503
5007
  s = db_with_cfh->db->Delete(write_options_,
4504
5008
  expanded_keys[offset]);
4505
5009
  #endif // ROCKSDB_LITE
@@ -4516,6 +5020,7 @@ class Benchmark {
4516
5020
  &end_key);
4517
5021
  if (use_blob_db_) {
4518
5022
  #ifndef ROCKSDB_LITE
5023
+ // Stacked BlobDB
4519
5024
  s = db_with_cfh->db->DeleteRange(
4520
5025
  write_options_, db_with_cfh->db->DefaultColumnFamily(),
4521
5026
  begin_key, end_key);
@@ -4540,7 +5045,8 @@ class Benchmark {
4540
5045
  }
4541
5046
  if (user_timestamp_size_ > 0) {
4542
5047
  Slice user_ts = mock_app_clock_->Allocate(ts_guard.get());
4543
- s = batch.AssignTimestamp(user_ts);
5048
+ s = batch.UpdateTimestamps(
5049
+ user_ts, [this](uint32_t) { return user_timestamp_size_; });
4544
5050
  if (!s.ok()) {
4545
5051
  fprintf(stderr, "assign timestamp to write batch: %s\n",
4546
5052
  s.ToString().c_str());
@@ -4548,6 +5054,7 @@ class Benchmark {
4548
5054
  }
4549
5055
  }
4550
5056
  if (!use_blob_db_) {
5057
+ // Not stacked BlobDB
4551
5058
  s = db_with_cfh->db->Write(write_options_, &batch);
4552
5059
  }
4553
5060
  thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db,
@@ -4582,6 +5089,17 @@ class Benchmark {
4582
5089
  ErrorExit();
4583
5090
  }
4584
5091
  }
5092
+ if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
5093
+ fprintf(stdout,
5094
+ "Number of unique keys inserted: %" PRIu64
5095
+ ".\nNumber of overwrites: %" PRIu64 "\n",
5096
+ num_unique_keys, num_overwrites);
5097
+ } else if (kNumDispAndPersEntries > 0) {
5098
+ fprintf(stdout,
5099
+ "Number of unique keys inserted (disposable+persistent): %" PRIu64
5100
+ ".\nNumber of 'disposable entry delete': %" PRIu64 "\n",
5101
+ num_written, num_selective_deletes);
5102
+ }
4585
5103
  thread->stats.AddBytes(bytes);
4586
5104
  }
4587
5105
 
@@ -4860,7 +5378,7 @@ class Benchmark {
4860
5378
  }
4861
5379
  if (levelMeta.level == 0) {
4862
5380
  for (auto& fileMeta : levelMeta.files) {
4863
- fprintf(stdout, "Level[%d]: %s(size: %" ROCKSDB_PRIszt " bytes)\n",
5381
+ fprintf(stdout, "Level[%d]: %s(size: %" PRIi64 " bytes)\n",
4864
5382
  levelMeta.level, fileMeta.name.c_str(), fileMeta.size);
4865
5383
  }
4866
5384
  } else {
@@ -4901,8 +5419,7 @@ class Benchmark {
4901
5419
  }
4902
5420
 
4903
5421
  void ReadSequential(ThreadState* thread, DB* db) {
4904
- ReadOptions options(FLAGS_verify_checksum, true);
4905
- options.tailing = FLAGS_use_tailing_iterator;
5422
+ ReadOptions options = read_options_;
4906
5423
  std::unique_ptr<char[]> ts_guard;
4907
5424
  Slice ts;
4908
5425
  if (user_timestamp_size_ > 0) {
@@ -4911,6 +5428,9 @@ class Benchmark {
4911
5428
  options.timestamp = &ts;
4912
5429
  }
4913
5430
 
5431
+ options.adaptive_readahead = FLAGS_adaptive_readahead;
5432
+ options.async_io = FLAGS_async_io;
5433
+
4914
5434
  Iterator* iter = db->NewIterator(options);
4915
5435
  int64_t i = 0;
4916
5436
  int64_t bytes = 0;
@@ -4940,7 +5460,6 @@ class Benchmark {
4940
5460
  int64_t found = 0;
4941
5461
  int64_t bytes = 0;
4942
5462
  int64_t key_rand = 0;
4943
- ReadOptions options(FLAGS_verify_checksum, true);
4944
5463
  std::unique_ptr<const char[]> key_guard;
4945
5464
  Slice key = AllocateKey(&key_guard);
4946
5465
  PinnableSlice pinnable_val;
@@ -4955,11 +5474,11 @@ class Benchmark {
4955
5474
  read++;
4956
5475
  Status s;
4957
5476
  if (FLAGS_num_column_families > 1) {
4958
- s = db_with_cfh->db->Get(options, db_with_cfh->GetCfh(key_rand), key,
4959
- &pinnable_val);
5477
+ s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
5478
+ key, &pinnable_val);
4960
5479
  } else {
4961
5480
  pinnable_val.Reset();
4962
- s = db_with_cfh->db->Get(options,
5481
+ s = db_with_cfh->db->Get(read_options_,
4963
5482
  db_with_cfh->db->DefaultColumnFamily(), key,
4964
5483
  &pinnable_val);
4965
5484
  }
@@ -5005,7 +5524,7 @@ class Benchmark {
5005
5524
  }
5006
5525
 
5007
5526
  void ReadReverse(ThreadState* thread, DB* db) {
5008
- Iterator* iter = db->NewIterator(ReadOptions(FLAGS_verify_checksum, true));
5527
+ Iterator* iter = db->NewIterator(read_options_);
5009
5528
  int64_t i = 0;
5010
5529
  int64_t bytes = 0;
5011
5530
  for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
@@ -5027,7 +5546,7 @@ class Benchmark {
5027
5546
  int64_t read = 0;
5028
5547
  int64_t found = 0;
5029
5548
  int64_t nonexist = 0;
5030
- ReadOptions options(FLAGS_verify_checksum, true);
5549
+ ReadOptions options = read_options_;
5031
5550
  std::unique_ptr<const char[]> key_guard;
5032
5551
  Slice key = AllocateKey(&key_guard);
5033
5552
  std::string value;
@@ -5117,10 +5636,16 @@ class Benchmark {
5117
5636
  int64_t bytes = 0;
5118
5637
  int num_keys = 0;
5119
5638
  int64_t key_rand = 0;
5120
- ReadOptions options(FLAGS_verify_checksum, true);
5639
+ ReadOptions options = read_options_;
5121
5640
  std::unique_ptr<const char[]> key_guard;
5122
5641
  Slice key = AllocateKey(&key_guard);
5123
5642
  PinnableSlice pinnable_val;
5643
+ std::vector<PinnableSlice> pinnable_vals;
5644
+ if (read_operands_) {
5645
+ // Start off with a small-ish value that'll be increased later if
5646
+ // `GetMergeOperands()` tells us it is not large enough.
5647
+ pinnable_vals.resize(8);
5648
+ }
5124
5649
  std::unique_ptr<char[]> ts_guard;
5125
5650
  Slice ts;
5126
5651
  if (user_timestamp_size_ > 0) {
@@ -5157,18 +5682,46 @@ class Benchmark {
5157
5682
  ts_ptr = &ts_ret;
5158
5683
  }
5159
5684
  Status s;
5685
+ pinnable_val.Reset();
5686
+ for (size_t i = 0; i < pinnable_vals.size(); ++i) {
5687
+ pinnable_vals[i].Reset();
5688
+ }
5689
+ ColumnFamilyHandle* cfh;
5160
5690
  if (FLAGS_num_column_families > 1) {
5161
- s = db_with_cfh->db->Get(options, db_with_cfh->GetCfh(key_rand), key,
5162
- &pinnable_val, ts_ptr);
5691
+ cfh = db_with_cfh->GetCfh(key_rand);
5163
5692
  } else {
5164
- pinnable_val.Reset();
5165
- s = db_with_cfh->db->Get(options,
5166
- db_with_cfh->db->DefaultColumnFamily(), key,
5167
- &pinnable_val, ts_ptr);
5693
+ cfh = db_with_cfh->db->DefaultColumnFamily();
5694
+ }
5695
+ if (read_operands_) {
5696
+ GetMergeOperandsOptions get_merge_operands_options;
5697
+ get_merge_operands_options.expected_max_number_of_operands =
5698
+ static_cast<int>(pinnable_vals.size());
5699
+ int number_of_operands;
5700
+ s = db_with_cfh->db->GetMergeOperands(
5701
+ options, cfh, key, pinnable_vals.data(),
5702
+ &get_merge_operands_options, &number_of_operands);
5703
+ if (s.IsIncomplete()) {
5704
+ // Should only happen a few times when we encounter a key that had
5705
+ // more merge operands than any key seen so far. Production use case
5706
+ // would typically retry in such event to get all the operands so do
5707
+ // that here.
5708
+ pinnable_vals.resize(number_of_operands);
5709
+ get_merge_operands_options.expected_max_number_of_operands =
5710
+ static_cast<int>(pinnable_vals.size());
5711
+ s = db_with_cfh->db->GetMergeOperands(
5712
+ options, cfh, key, pinnable_vals.data(),
5713
+ &get_merge_operands_options, &number_of_operands);
5714
+ }
5715
+ } else {
5716
+ s = db_with_cfh->db->Get(options, cfh, key, &pinnable_val, ts_ptr);
5168
5717
  }
5718
+
5169
5719
  if (s.ok()) {
5170
5720
  found++;
5171
5721
  bytes += key.size() + pinnable_val.size() + user_timestamp_size_;
5722
+ for (size_t i = 0; i < pinnable_vals.size(); ++i) {
5723
+ bytes += pinnable_vals[i].size();
5724
+ }
5172
5725
  } else if (!s.IsNotFound()) {
5173
5726
  fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
5174
5727
  abort();
@@ -5200,9 +5753,10 @@ class Benchmark {
5200
5753
  // Returns the total number of keys found.
5201
5754
  void MultiReadRandom(ThreadState* thread) {
5202
5755
  int64_t read = 0;
5756
+ int64_t bytes = 0;
5203
5757
  int64_t num_multireads = 0;
5204
5758
  int64_t found = 0;
5205
- ReadOptions options(FLAGS_verify_checksum, true);
5759
+ ReadOptions options = read_options_;
5206
5760
  std::vector<Slice> keys;
5207
5761
  std::vector<std::unique_ptr<const char[]> > key_guards;
5208
5762
  std::vector<std::string> values(entries_per_batch_);
@@ -5250,6 +5804,7 @@ class Benchmark {
5250
5804
  num_multireads++;
5251
5805
  for (int64_t i = 0; i < entries_per_batch_; ++i) {
5252
5806
  if (statuses[i].ok()) {
5807
+ bytes += keys[i].size() + values[i].size() + user_timestamp_size_;
5253
5808
  ++found;
5254
5809
  } else if (!statuses[i].IsNotFound()) {
5255
5810
  fprintf(stderr, "MultiGet returned an error: %s\n",
@@ -5265,6 +5820,8 @@ class Benchmark {
5265
5820
  num_multireads++;
5266
5821
  for (int64_t i = 0; i < entries_per_batch_; ++i) {
5267
5822
  if (stat_list[i].ok()) {
5823
+ bytes +=
5824
+ keys[i].size() + pin_values[i].size() + user_timestamp_size_;
5268
5825
  ++found;
5269
5826
  } else if (!stat_list[i].IsNotFound()) {
5270
5827
  fprintf(stderr, "MultiGet returned an error: %s\n",
@@ -5287,6 +5844,7 @@ class Benchmark {
5287
5844
  char msg[100];
5288
5845
  snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)",
5289
5846
  found, read);
5847
+ thread->stats.AddBytes(bytes);
5290
5848
  thread->stats.AddMessage(msg);
5291
5849
  }
5292
5850
 
@@ -5547,21 +6105,22 @@ class Benchmark {
5547
6105
  }
5548
6106
  };
5549
6107
 
5550
- // The social graph wokrload mixed with Get, Put, Iterator queries.
6108
+ // The social graph workload mixed with Get, Put, Iterator queries.
5551
6109
  // The value size and iterator length follow Pareto distribution.
5552
6110
  // The overall key access follow power distribution. If user models the
5553
6111
  // workload based on different key-ranges (or different prefixes), user
5554
6112
  // can use two-term-exponential distribution to fit the workload. User
5555
- // needs to decides the ratio between Get, Put, Iterator queries before
6113
+ // needs to decide the ratio between Get, Put, Iterator queries before
5556
6114
  // starting the benchmark.
5557
6115
  void MixGraph(ThreadState* thread) {
5558
- int64_t read = 0; // including single gets and Next of iterators
5559
6116
  int64_t gets = 0;
5560
6117
  int64_t puts = 0;
5561
- int64_t found = 0;
6118
+ int64_t get_found = 0;
5562
6119
  int64_t seek = 0;
5563
6120
  int64_t seek_found = 0;
5564
6121
  int64_t bytes = 0;
6122
+ double total_scan_length = 0;
6123
+ double total_val_size = 0;
5565
6124
  const int64_t default_value_max = 1 * 1024 * 1024;
5566
6125
  int64_t value_max = default_value_max;
5567
6126
  int64_t scan_len_max = FLAGS_mix_max_scan_len;
@@ -5580,17 +6139,15 @@ class Benchmark {
5580
6139
  value_max = FLAGS_mix_max_value_size;
5581
6140
  }
5582
6141
 
5583
- ReadOptions options(FLAGS_verify_checksum, true);
5584
6142
  std::unique_ptr<const char[]> key_guard;
5585
6143
  Slice key = AllocateKey(&key_guard);
5586
6144
  PinnableSlice pinnable_val;
5587
6145
  query.Initiate(ratio);
5588
6146
 
5589
6147
  // the limit of qps initiation
5590
- if (FLAGS_sine_a != 0 || FLAGS_sine_d != 0) {
5591
- thread->shared->read_rate_limiter.reset(NewGenericRateLimiter(
5592
- static_cast<int64_t>(read_rate), 100000 /* refill_period_us */, 10 /* fairness */,
5593
- RateLimiter::Mode::kReadsOnly));
6148
+ if (FLAGS_sine_mix_rate) {
6149
+ thread->shared->read_rate_limiter.reset(
6150
+ NewGenericRateLimiter(static_cast<int64_t>(read_rate)));
5594
6151
  thread->shared->write_rate_limiter.reset(
5595
6152
  NewGenericRateLimiter(static_cast<int64_t>(write_rate)));
5596
6153
  }
@@ -5638,52 +6195,51 @@ class Benchmark {
5638
6195
  usecs_since_last = 0;
5639
6196
  }
5640
6197
 
5641
- if (usecs_since_last >
5642
- (FLAGS_sine_mix_rate_interval_milliseconds * uint64_t{1000})) {
6198
+ if (FLAGS_sine_mix_rate &&
6199
+ usecs_since_last >
6200
+ (FLAGS_sine_mix_rate_interval_milliseconds * uint64_t{1000})) {
5643
6201
  double usecs_since_start =
5644
6202
  static_cast<double>(now - thread->stats.GetStart());
5645
6203
  thread->stats.ResetSineInterval();
5646
6204
  double mix_rate_with_noise = AddNoise(
5647
6205
  SineRate(usecs_since_start / 1000000.0), FLAGS_sine_mix_rate_noise);
5648
6206
  read_rate = mix_rate_with_noise * (query.ratio_[0] + query.ratio_[2]);
5649
- write_rate =
5650
- mix_rate_with_noise * query.ratio_[1] * FLAGS_mix_ave_kv_size;
6207
+ write_rate = mix_rate_with_noise * query.ratio_[1];
5651
6208
 
5652
- thread->shared->write_rate_limiter.reset(
5653
- NewGenericRateLimiter(static_cast<int64_t>(write_rate)));
5654
- thread->shared->read_rate_limiter.reset(NewGenericRateLimiter(
5655
- static_cast<int64_t>(read_rate),
5656
- FLAGS_sine_mix_rate_interval_milliseconds * uint64_t{1000}, 10,
5657
- RateLimiter::Mode::kReadsOnly));
6209
+ if (read_rate > 0) {
6210
+ thread->shared->read_rate_limiter->SetBytesPerSecond(
6211
+ static_cast<int64_t>(read_rate));
6212
+ }
6213
+ if (write_rate > 0) {
6214
+ thread->shared->write_rate_limiter->SetBytesPerSecond(
6215
+ static_cast<int64_t>(write_rate));
6216
+ }
5658
6217
  }
5659
6218
  // Start the query
5660
6219
  if (query_type == 0) {
5661
6220
  // the Get query
5662
6221
  gets++;
5663
- read++;
5664
6222
  if (FLAGS_num_column_families > 1) {
5665
- s = db_with_cfh->db->Get(options, db_with_cfh->GetCfh(key_rand), key,
5666
- &pinnable_val);
6223
+ s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
6224
+ key, &pinnable_val);
5667
6225
  } else {
5668
6226
  pinnable_val.Reset();
5669
- s = db_with_cfh->db->Get(options,
6227
+ s = db_with_cfh->db->Get(read_options_,
5670
6228
  db_with_cfh->db->DefaultColumnFamily(), key,
5671
6229
  &pinnable_val);
5672
6230
  }
5673
6231
 
5674
6232
  if (s.ok()) {
5675
- found++;
6233
+ get_found++;
5676
6234
  bytes += key.size() + pinnable_val.size();
5677
6235
  } else if (!s.IsNotFound()) {
5678
6236
  fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
5679
6237
  abort();
5680
6238
  }
5681
6239
 
5682
- if (thread->shared->read_rate_limiter.get() != nullptr &&
5683
- read % 256 == 255) {
5684
- thread->shared->read_rate_limiter->Request(
5685
- 256, Env::IO_HIGH, nullptr /* stats */,
5686
- RateLimiter::OpType::kRead);
6240
+ if (thread->shared->read_rate_limiter && (gets + seek) % 100 == 0) {
6241
+ thread->shared->read_rate_limiter->Request(100, Env::IO_HIGH,
6242
+ nullptr /*stats*/);
5687
6243
  }
5688
6244
  thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db, 1, kRead);
5689
6245
  } else if (query_type == 1) {
@@ -5691,11 +6247,13 @@ class Benchmark {
5691
6247
  puts++;
5692
6248
  int64_t val_size = ParetoCdfInversion(
5693
6249
  u, FLAGS_value_theta, FLAGS_value_k, FLAGS_value_sigma);
5694
- if (val_size < 0) {
6250
+ if (val_size < 10) {
5695
6251
  val_size = 10;
5696
6252
  } else if (val_size > value_max) {
5697
6253
  val_size = val_size % value_max;
5698
6254
  }
6255
+ total_val_size += val_size;
6256
+
5699
6257
  s = db_with_cfh->db->Put(
5700
6258
  write_options_, key,
5701
6259
  gen.Generate(static_cast<unsigned int>(val_size)));
@@ -5704,21 +6262,19 @@ class Benchmark {
5704
6262
  ErrorExit();
5705
6263
  }
5706
6264
 
5707
- if (thread->shared->write_rate_limiter) {
5708
- thread->shared->write_rate_limiter->Request(
5709
- key.size() + val_size, Env::IO_HIGH, nullptr /*stats*/,
5710
- RateLimiter::OpType::kWrite);
6265
+ if (thread->shared->write_rate_limiter && puts % 100 == 0) {
6266
+ thread->shared->write_rate_limiter->Request(100, Env::IO_HIGH,
6267
+ nullptr /*stats*/);
5711
6268
  }
5712
6269
  thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db, 1, kWrite);
5713
6270
  } else if (query_type == 2) {
5714
6271
  // Seek query
5715
6272
  if (db_with_cfh->db != nullptr) {
5716
6273
  Iterator* single_iter = nullptr;
5717
- single_iter = db_with_cfh->db->NewIterator(options);
6274
+ single_iter = db_with_cfh->db->NewIterator(read_options_);
5718
6275
  if (single_iter != nullptr) {
5719
6276
  single_iter->Seek(key);
5720
6277
  seek++;
5721
- read++;
5722
6278
  if (single_iter->Valid() && single_iter->key().compare(key) == 0) {
5723
6279
  seek_found++;
5724
6280
  }
@@ -5733,6 +6289,7 @@ class Benchmark {
5733
6289
  bytes += single_iter->key().size() + single_iter->value().size();
5734
6290
  single_iter->Next();
5735
6291
  assert(single_iter->status().ok());
6292
+ total_scan_length++;
5736
6293
  }
5737
6294
  }
5738
6295
  delete single_iter;
@@ -5742,9 +6299,12 @@ class Benchmark {
5742
6299
  }
5743
6300
  char msg[256];
5744
6301
  snprintf(msg, sizeof(msg),
5745
- "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 " of %" PRIu64
5746
- " in %" PRIu64 " found)\n",
5747
- gets, puts, seek, found, read);
6302
+ "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64
6303
+ ", reads %" PRIu64 " in %" PRIu64
6304
+ " found, "
6305
+ "avg size: %.1f value, %.1f scan)\n",
6306
+ gets, puts, seek, get_found + seek_found, gets + seek,
6307
+ total_val_size / puts, total_scan_length / seek);
5748
6308
 
5749
6309
  thread->stats.AddBytes(bytes);
5750
6310
  thread->stats.AddMessage(msg);
@@ -5757,7 +6317,7 @@ class Benchmark {
5757
6317
 
5758
6318
  void IteratorCreation(ThreadState* thread) {
5759
6319
  Duration duration(FLAGS_duration, reads_);
5760
- ReadOptions options(FLAGS_verify_checksum, true);
6320
+ ReadOptions options = read_options_;
5761
6321
  std::unique_ptr<char[]> ts_guard;
5762
6322
  if (user_timestamp_size_ > 0) {
5763
6323
  ts_guard.reset(new char[user_timestamp_size_]);
@@ -5787,11 +6347,7 @@ class Benchmark {
5787
6347
  int64_t read = 0;
5788
6348
  int64_t found = 0;
5789
6349
  int64_t bytes = 0;
5790
- ReadOptions options(FLAGS_verify_checksum, true);
5791
- options.total_order_seek = FLAGS_total_order_seek;
5792
- options.prefix_same_as_start = FLAGS_prefix_same_as_start;
5793
- options.tailing = FLAGS_use_tailing_iterator;
5794
- options.readahead_size = FLAGS_readahead_size;
6350
+ ReadOptions options = read_options_;
5795
6351
  std::unique_ptr<char[]> ts_guard;
5796
6352
  Slice ts;
5797
6353
  if (user_timestamp_size_ > 0) {
@@ -5800,13 +6356,14 @@ class Benchmark {
5800
6356
  options.timestamp = &ts;
5801
6357
  }
5802
6358
 
5803
- Iterator* single_iter = nullptr;
5804
- std::vector<Iterator*> multi_iters;
5805
- if (db_.db != nullptr) {
5806
- single_iter = db_.db->NewIterator(options);
5807
- } else {
5808
- for (const auto& db_with_cfh : multi_dbs_) {
5809
- multi_iters.push_back(db_with_cfh.db->NewIterator(options));
6359
+ std::vector<Iterator*> tailing_iters;
6360
+ if (FLAGS_use_tailing_iterator) {
6361
+ if (db_.db != nullptr) {
6362
+ tailing_iters.push_back(db_.db->NewIterator(options));
6363
+ } else {
6364
+ for (const auto& db_with_cfh : multi_dbs_) {
6365
+ tailing_iters.push_back(db_with_cfh.db->NewIterator(options));
6366
+ }
5810
6367
  }
5811
6368
  }
5812
6369
 
@@ -5840,24 +6397,22 @@ class Benchmark {
5840
6397
  }
5841
6398
  }
5842
6399
 
5843
- if (!FLAGS_use_tailing_iterator) {
6400
+ // Pick a Iterator to use
6401
+ uint64_t db_idx_to_use =
6402
+ (db_.db == nullptr)
6403
+ ? (uint64_t{thread->rand.Next()} % multi_dbs_.size())
6404
+ : 0;
6405
+ std::unique_ptr<Iterator> single_iter;
6406
+ Iterator* iter_to_use;
6407
+ if (FLAGS_use_tailing_iterator) {
6408
+ iter_to_use = tailing_iters[db_idx_to_use];
6409
+ } else {
5844
6410
  if (db_.db != nullptr) {
5845
- delete single_iter;
5846
- single_iter = db_.db->NewIterator(options);
6411
+ single_iter.reset(db_.db->NewIterator(options));
5847
6412
  } else {
5848
- for (auto iter : multi_iters) {
5849
- delete iter;
5850
- }
5851
- multi_iters.clear();
5852
- for (const auto& db_with_cfh : multi_dbs_) {
5853
- multi_iters.push_back(db_with_cfh.db->NewIterator(options));
5854
- }
6413
+ single_iter.reset(multi_dbs_[db_idx_to_use].db->NewIterator(options));
5855
6414
  }
5856
- }
5857
- // Pick a Iterator to use
5858
- Iterator* iter_to_use = single_iter;
5859
- if (single_iter == nullptr) {
5860
- iter_to_use = multi_iters[thread->rand.Next() % multi_iters.size()];
6415
+ iter_to_use = single_iter.get();
5861
6416
  }
5862
6417
 
5863
6418
  iter_to_use->Seek(key);
@@ -5889,8 +6444,7 @@ class Benchmark {
5889
6444
 
5890
6445
  thread->stats.FinishedOps(&db_, db_.db, 1, kSeek);
5891
6446
  }
5892
- delete single_iter;
5893
- for (auto iter : multi_iters) {
6447
+ for (auto iter : tailing_iters) {
5894
6448
  delete iter;
5895
6449
  }
5896
6450
 
@@ -5923,7 +6477,7 @@ class Benchmark {
5923
6477
 
5924
6478
  void DoDelete(ThreadState* thread, bool seq) {
5925
6479
  WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
5926
- user_timestamp_size_);
6480
+ /*protection_bytes_per_key=*/0, user_timestamp_size_);
5927
6481
  Duration duration(seq ? 0 : FLAGS_duration, deletes_);
5928
6482
  int64_t i = 0;
5929
6483
  std::unique_ptr<const char[]> key_guard;
@@ -5945,7 +6499,8 @@ class Benchmark {
5945
6499
  Status s;
5946
6500
  if (user_timestamp_size_ > 0) {
5947
6501
  ts = mock_app_clock_->Allocate(ts_guard.get());
5948
- s = batch.AssignTimestamp(ts);
6502
+ s = batch.UpdateTimestamps(
6503
+ ts, [this](uint32_t) { return user_timestamp_size_; });
5949
6504
  if (!s.ok()) {
5950
6505
  fprintf(stderr, "assign timestamp: %s\n", s.ToString().c_str());
5951
6506
  ErrorExit();
@@ -6039,17 +6594,17 @@ class Benchmark {
6039
6594
  Slice ts;
6040
6595
  if (user_timestamp_size_ > 0) {
6041
6596
  ts = mock_app_clock_->Allocate(ts_guard.get());
6042
- write_options_.timestamp = &ts;
6043
6597
  }
6044
6598
  if (write_merge == kWrite) {
6045
- s = db->Put(write_options_, key, val);
6599
+ if (user_timestamp_size_ == 0) {
6600
+ s = db->Put(write_options_, key, val);
6601
+ } else {
6602
+ s = db->Put(write_options_, key, ts, val);
6603
+ }
6046
6604
  } else {
6047
6605
  s = db->Merge(write_options_, key, val);
6048
6606
  }
6049
6607
  // Restore write_options_
6050
- if (user_timestamp_size_ > 0) {
6051
- write_options_.timestamp = nullptr;
6052
- }
6053
6608
  written++;
6054
6609
 
6055
6610
  if (!s.ok()) {
@@ -6082,7 +6637,7 @@ class Benchmark {
6082
6637
  abort();
6083
6638
  }
6084
6639
  assert(db_.db != nullptr);
6085
- ReadOptions read_options;
6640
+ ReadOptions read_options = read_options_;
6086
6641
  std::unique_ptr<char[]> ts_guard;
6087
6642
  Slice ts;
6088
6643
  if (user_timestamp_size_ > 0) {
@@ -6122,7 +6677,7 @@ class Benchmark {
6122
6677
  std::string keys[3];
6123
6678
 
6124
6679
  WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
6125
- user_timestamp_size_);
6680
+ /*protection_bytes_per_key=*/0, user_timestamp_size_);
6126
6681
  Status s;
6127
6682
  for (int i = 0; i < 3; i++) {
6128
6683
  keys[i] = key.ToString() + suffixes[i];
@@ -6133,7 +6688,8 @@ class Benchmark {
6133
6688
  if (user_timestamp_size_ > 0) {
6134
6689
  ts_guard.reset(new char[user_timestamp_size_]);
6135
6690
  Slice ts = mock_app_clock_->Allocate(ts_guard.get());
6136
- s = batch.AssignTimestamp(ts);
6691
+ s = batch.UpdateTimestamps(
6692
+ ts, [this](uint32_t) { return user_timestamp_size_; });
6137
6693
  if (!s.ok()) {
6138
6694
  fprintf(stderr, "assign timestamp to batch: %s\n",
6139
6695
  s.ToString().c_str());
@@ -6153,7 +6709,8 @@ class Benchmark {
6153
6709
  std::string suffixes[3] = {"1", "2", "0"};
6154
6710
  std::string keys[3];
6155
6711
 
6156
- WriteBatch batch(0, 0, user_timestamp_size_);
6712
+ WriteBatch batch(0, 0, /*protection_bytes_per_key=*/0,
6713
+ user_timestamp_size_);
6157
6714
  Status s;
6158
6715
  for (int i = 0; i < 3; i++) {
6159
6716
  keys[i] = key.ToString() + suffixes[i];
@@ -6164,7 +6721,8 @@ class Benchmark {
6164
6721
  if (user_timestamp_size_ > 0) {
6165
6722
  ts_guard.reset(new char[user_timestamp_size_]);
6166
6723
  Slice ts = mock_app_clock_->Allocate(ts_guard.get());
6167
- s = batch.AssignTimestamp(ts);
6724
+ s = batch.UpdateTimestamps(
6725
+ ts, [this](uint32_t) { return user_timestamp_size_; });
6168
6726
  if (!s.ok()) {
6169
6727
  fprintf(stderr, "assign timestamp to batch: %s\n",
6170
6728
  s.ToString().c_str());
@@ -6179,13 +6737,12 @@ class Benchmark {
6179
6737
  // Given a key K and value V, this gets values for K+"0", K+"1" and K+"2"
6180
6738
  // in the same snapshot, and verifies that all the values are identical.
6181
6739
  // ASSUMES that PutMany was used to put (K, V) into the DB.
6182
- Status GetMany(DB* db, const ReadOptions& readoptions, const Slice& key,
6183
- std::string* value) {
6740
+ Status GetMany(DB* db, const Slice& key, std::string* value) {
6184
6741
  std::string suffixes[3] = {"0", "1", "2"};
6185
6742
  std::string keys[3];
6186
6743
  Slice key_slices[3];
6187
6744
  std::string values[3];
6188
- ReadOptions readoptionscopy = readoptions;
6745
+ ReadOptions readoptionscopy = read_options_;
6189
6746
 
6190
6747
  std::unique_ptr<char[]> ts_guard;
6191
6748
  Slice ts;
@@ -6233,7 +6790,6 @@ class Benchmark {
6233
6790
  // FLAGS_numdistinct distinct keys instead of FLAGS_num distinct keys.
6234
6791
  // (d) Does not have a MultiGet option.
6235
6792
  void RandomWithVerify(ThreadState* thread) {
6236
- ReadOptions options(FLAGS_verify_checksum, true);
6237
6793
  RandomGenerator gen;
6238
6794
  std::string value;
6239
6795
  int64_t found = 0;
@@ -6260,7 +6816,7 @@ class Benchmark {
6260
6816
  FLAGS_numdistinct, &key);
6261
6817
  if (get_weight > 0) {
6262
6818
  // do all the gets first
6263
- Status s = GetMany(db, options, key, &value);
6819
+ Status s = GetMany(db, key, &value);
6264
6820
  if (!s.ok() && !s.IsNotFound()) {
6265
6821
  fprintf(stderr, "getmany error: %s\n", s.ToString().c_str());
6266
6822
  // we continue after error rather than exiting so that we can
@@ -6304,7 +6860,7 @@ class Benchmark {
6304
6860
  // This is different from ReadWhileWriting because it does not use
6305
6861
  // an extra thread.
6306
6862
  void ReadRandomWriteRandom(ThreadState* thread) {
6307
- ReadOptions options(FLAGS_verify_checksum, true);
6863
+ ReadOptions options = read_options_;
6308
6864
  RandomGenerator gen;
6309
6865
  std::string value;
6310
6866
  int64_t found = 0;
@@ -6353,12 +6909,13 @@ class Benchmark {
6353
6909
  } else if (put_weight > 0) {
6354
6910
  // then do all the corresponding number of puts
6355
6911
  // for all the gets we have done earlier
6356
- Slice ts;
6912
+ Status s;
6357
6913
  if (user_timestamp_size_ > 0) {
6358
- ts = mock_app_clock_->Allocate(ts_guard.get());
6359
- write_options_.timestamp = &ts;
6914
+ Slice ts = mock_app_clock_->Allocate(ts_guard.get());
6915
+ s = db->Put(write_options_, key, ts, gen.Generate());
6916
+ } else {
6917
+ s = db->Put(write_options_, key, gen.Generate());
6360
6918
  }
6361
- Status s = db->Put(write_options_, key, gen.Generate());
6362
6919
  if (!s.ok()) {
6363
6920
  fprintf(stderr, "put error: %s\n", s.ToString().c_str());
6364
6921
  ErrorExit();
@@ -6378,7 +6935,7 @@ class Benchmark {
6378
6935
  //
6379
6936
  // Read-modify-write for random keys
6380
6937
  void UpdateRandom(ThreadState* thread) {
6381
- ReadOptions options(FLAGS_verify_checksum, true);
6938
+ ReadOptions options = read_options_;
6382
6939
  RandomGenerator gen;
6383
6940
  std::string value;
6384
6941
  int64_t found = 0;
@@ -6419,11 +6976,13 @@ class Benchmark {
6419
6976
  }
6420
6977
 
6421
6978
  Slice val = gen.Generate();
6979
+ Status s;
6422
6980
  if (user_timestamp_size_ > 0) {
6423
6981
  ts = mock_app_clock_->Allocate(ts_guard.get());
6424
- write_options_.timestamp = &ts;
6982
+ s = db->Put(write_options_, key, ts, val);
6983
+ } else {
6984
+ s = db->Put(write_options_, key, val);
6425
6985
  }
6426
- Status s = db->Put(write_options_, key, val);
6427
6986
  if (!s.ok()) {
6428
6987
  fprintf(stderr, "put error: %s\n", s.ToString().c_str());
6429
6988
  exit(1);
@@ -6443,7 +7002,7 @@ class Benchmark {
6443
7002
  // representing the existing value, we generate an array B of the same size,
6444
7003
  // then compute C = A^B as C[i]=A[i]^B[i], and store C
6445
7004
  void XORUpdateRandom(ThreadState* thread) {
6446
- ReadOptions options(FLAGS_verify_checksum, true);
7005
+ ReadOptions options = read_options_;
6447
7006
  RandomGenerator gen;
6448
7007
  std::string existing_value;
6449
7008
  int64_t found = 0;
@@ -6486,12 +7045,13 @@ class Benchmark {
6486
7045
  xor_operator.XOR(nullptr, value, &new_value);
6487
7046
  }
6488
7047
 
7048
+ Status s;
6489
7049
  if (user_timestamp_size_ > 0) {
6490
7050
  ts = mock_app_clock_->Allocate(ts_guard.get());
6491
- write_options_.timestamp = &ts;
7051
+ s = db->Put(write_options_, key, ts, Slice(new_value));
7052
+ } else {
7053
+ s = db->Put(write_options_, key, Slice(new_value));
6492
7054
  }
6493
-
6494
- Status s = db->Put(write_options_, key, Slice(new_value));
6495
7055
  if (!s.ok()) {
6496
7056
  fprintf(stderr, "put error: %s\n", s.ToString().c_str());
6497
7057
  ErrorExit();
@@ -6508,7 +7068,7 @@ class Benchmark {
6508
7068
  // Each operation causes the key grow by value_size (simulating an append).
6509
7069
  // Generally used for benchmarking against merges of similar type
6510
7070
  void AppendRandom(ThreadState* thread) {
6511
- ReadOptions options(FLAGS_verify_checksum, true);
7071
+ ReadOptions options = read_options_;
6512
7072
  RandomGenerator gen;
6513
7073
  std::string value;
6514
7074
  int64_t found = 0;
@@ -6552,13 +7112,14 @@ class Benchmark {
6552
7112
  }
6553
7113
  value.append(operand.data(), operand.size());
6554
7114
 
7115
+ Status s;
6555
7116
  if (user_timestamp_size_ > 0) {
6556
7117
  ts = mock_app_clock_->Allocate(ts_guard.get());
6557
- write_options_.timestamp = &ts;
7118
+ s = db->Put(write_options_, key, ts, value);
7119
+ } else {
7120
+ // Write back to the database
7121
+ s = db->Put(write_options_, key, value);
6558
7122
  }
6559
-
6560
- // Write back to the database
6561
- Status s = db->Put(write_options_, key, value);
6562
7123
  if (!s.ok()) {
6563
7124
  fprintf(stderr, "put error: %s\n", s.ToString().c_str());
6564
7125
  ErrorExit();
@@ -6631,7 +7192,6 @@ class Benchmark {
6631
7192
  // As with MergeRandom, the merge operator to use should be defined by
6632
7193
  // FLAGS_merge_operator.
6633
7194
  void ReadRandomMergeRandom(ThreadState* thread) {
6634
- ReadOptions options(FLAGS_verify_checksum, true);
6635
7195
  RandomGenerator gen;
6636
7196
  std::string value;
6637
7197
  int64_t num_hits = 0;
@@ -6658,7 +7218,7 @@ class Benchmark {
6658
7218
  num_merges++;
6659
7219
  thread->stats.FinishedOps(nullptr, db, 1, kMerge);
6660
7220
  } else {
6661
- Status s = db->Get(options, key, &value);
7221
+ Status s = db->Get(read_options_, key, &value);
6662
7222
  if (value.length() > max_length)
6663
7223
  max_length = value.length();
6664
7224
 
@@ -6689,7 +7249,7 @@ class Benchmark {
6689
7249
  thread->stats.Start(thread->tid);
6690
7250
 
6691
7251
  DB* db = SelectDB(thread);
6692
- ReadOptions read_opts(FLAGS_verify_checksum, true);
7252
+ ReadOptions read_opts = read_options_;
6693
7253
  std::unique_ptr<char[]> ts_guard;
6694
7254
  Slice ts;
6695
7255
  if (user_timestamp_size_ > 0) {
@@ -6816,6 +7376,37 @@ class Benchmark {
6816
7376
  }
6817
7377
 
6818
7378
  #ifndef ROCKSDB_LITE
7379
+ void VerifyChecksum(ThreadState* thread) {
7380
+ DB* db = SelectDB(thread);
7381
+ ReadOptions ro;
7382
+ ro.adaptive_readahead = FLAGS_adaptive_readahead;
7383
+ ro.async_io = FLAGS_async_io;
7384
+ ro.rate_limiter_priority =
7385
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
7386
+ ro.readahead_size = FLAGS_readahead_size;
7387
+ Status s = db->VerifyChecksum(ro);
7388
+ if (!s.ok()) {
7389
+ fprintf(stderr, "VerifyChecksum() failed: %s\n", s.ToString().c_str());
7390
+ exit(1);
7391
+ }
7392
+ }
7393
+
7394
+ void VerifyFileChecksums(ThreadState* thread) {
7395
+ DB* db = SelectDB(thread);
7396
+ ReadOptions ro;
7397
+ ro.adaptive_readahead = FLAGS_adaptive_readahead;
7398
+ ro.async_io = FLAGS_async_io;
7399
+ ro.rate_limiter_priority =
7400
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
7401
+ ro.readahead_size = FLAGS_readahead_size;
7402
+ Status s = db->VerifyFileChecksums(ro);
7403
+ if (!s.ok()) {
7404
+ fprintf(stderr, "VerifyFileChecksums() failed: %s\n",
7405
+ s.ToString().c_str());
7406
+ exit(1);
7407
+ }
7408
+ }
7409
+
6819
7410
  // This benchmark stress tests Transactions. For a given --duration (or
6820
7411
  // total number of --writes, a Transaction will perform a read-modify-write
6821
7412
  // to increment the value of a key in each of N(--transaction-sets) sets of
@@ -6829,9 +7420,7 @@ class Benchmark {
6829
7420
  // RandomTransactionVerify() will then validate the correctness of the results
6830
7421
  // by checking if the sum of all keys in each set is the same.
6831
7422
  void RandomTransaction(ThreadState* thread) {
6832
- ReadOptions options(FLAGS_verify_checksum, true);
6833
7423
  Duration duration(FLAGS_duration, readwrites_);
6834
- ReadOptions read_options(FLAGS_verify_checksum, true);
6835
7424
  uint16_t num_prefix_ranges = static_cast<uint16_t>(FLAGS_transaction_sets);
6836
7425
  uint64_t transactions_done = 0;
6837
7426
 
@@ -6845,7 +7434,7 @@ class Benchmark {
6845
7434
  txn_options.set_snapshot = FLAGS_transaction_set_snapshot;
6846
7435
 
6847
7436
  RandomTransactionInserter inserter(&thread->rand, write_options_,
6848
- read_options, FLAGS_num,
7437
+ read_options_, FLAGS_num,
6849
7438
  num_prefix_ranges);
6850
7439
 
6851
7440
  if (FLAGS_num_multi_db > 1) {
@@ -6937,12 +7526,12 @@ class Benchmark {
6937
7526
  DB* db = SelectDB(thread);
6938
7527
  for (int64_t i = 0; i < FLAGS_numdistinct; i++) {
6939
7528
  GenerateKeyFromInt(i * max_counter, FLAGS_num, &key);
6940
- Slice ts;
6941
7529
  if (user_timestamp_size_ > 0) {
6942
- ts = mock_app_clock_->Allocate(ts_guard.get());
6943
- write_options_.timestamp = &ts;
7530
+ Slice ts = mock_app_clock_->Allocate(ts_guard.get());
7531
+ s = db->Put(write_options_, key, ts, gen.Generate());
7532
+ } else {
7533
+ s = db->Put(write_options_, key, gen.Generate());
6944
7534
  }
6945
- s = db->Put(write_options_, key, gen.Generate());
6946
7535
  if (!s.ok()) {
6947
7536
  fprintf(stderr, "Operation failed: %s\n", s.ToString().c_str());
6948
7537
  exit(1);
@@ -6961,22 +7550,24 @@ class Benchmark {
6961
7550
  static_cast<int64_t>(0));
6962
7551
  GenerateKeyFromInt(key_id * max_counter + counters[key_id], FLAGS_num,
6963
7552
  &key);
6964
- Slice ts;
6965
7553
  if (user_timestamp_size_ > 0) {
6966
- ts = mock_app_clock_->Allocate(ts_guard.get());
6967
- write_options_.timestamp = &ts;
7554
+ Slice ts = mock_app_clock_->Allocate(ts_guard.get());
7555
+ s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key, ts)
7556
+ : db->Delete(write_options_, key, ts);
7557
+ } else {
7558
+ s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key)
7559
+ : db->Delete(write_options_, key);
6968
7560
  }
6969
- s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key)
6970
- : db->Delete(write_options_, key);
6971
7561
  if (s.ok()) {
6972
7562
  counters[key_id] = (counters[key_id] + 1) % max_counter;
6973
7563
  GenerateKeyFromInt(key_id * max_counter + counters[key_id], FLAGS_num,
6974
7564
  &key);
6975
7565
  if (user_timestamp_size_ > 0) {
6976
- ts = mock_app_clock_->Allocate(ts_guard.get());
6977
- write_options_.timestamp = &ts;
7566
+ Slice ts = mock_app_clock_->Allocate(ts_guard.get());
7567
+ s = db->Put(write_options_, key, ts, Slice());
7568
+ } else {
7569
+ s = db->Put(write_options_, key, Slice());
6978
7570
  }
6979
- s = db->Put(write_options_, key, Slice());
6980
7571
  }
6981
7572
 
6982
7573
  if (!s.ok()) {
@@ -6996,7 +7587,6 @@ class Benchmark {
6996
7587
  }
6997
7588
 
6998
7589
  void TimeSeriesReadOrDelete(ThreadState* thread, bool do_deletion) {
6999
- ReadOptions options(FLAGS_verify_checksum, true);
7000
7590
  int64_t read = 0;
7001
7591
  int64_t found = 0;
7002
7592
  int64_t bytes = 0;
@@ -7004,7 +7594,7 @@ class Benchmark {
7004
7594
  Iterator* iter = nullptr;
7005
7595
  // Only work on single database
7006
7596
  assert(db_.db != nullptr);
7007
- iter = db_.db->NewIterator(options);
7597
+ iter = db_.db->NewIterator(read_options_);
7008
7598
 
7009
7599
  std::unique_ptr<const char[]> key_guard;
7010
7600
  Slice key = AllocateKey(&key_guard);
@@ -7020,7 +7610,7 @@ class Benchmark {
7020
7610
  }
7021
7611
  if (!FLAGS_use_tailing_iterator) {
7022
7612
  delete iter;
7023
- iter = db_.db->NewIterator(options);
7613
+ iter = db_.db->NewIterator(read_options_);
7024
7614
  }
7025
7615
  // Pick a Iterator to use
7026
7616
 
@@ -7166,6 +7756,167 @@ class Benchmark {
7166
7756
  }
7167
7757
  }
7168
7758
 
7759
+ #ifndef ROCKSDB_LITE
7760
+ void WaitForCompactionHelper(DBWithColumnFamilies& db) {
7761
+ // This is an imperfect way of waiting for compaction. The loop and sleep
7762
+ // is done because a thread that finishes a compaction job should get a
7763
+ // chance to pickup a new compaction job.
7764
+
7765
+ std::vector<std::string> keys = {DB::Properties::kMemTableFlushPending,
7766
+ DB::Properties::kNumRunningFlushes,
7767
+ DB::Properties::kCompactionPending,
7768
+ DB::Properties::kNumRunningCompactions};
7769
+
7770
+ fprintf(stdout, "waitforcompaction(%s): started\n",
7771
+ db.db->GetName().c_str());
7772
+
7773
+ while (true) {
7774
+ bool retry = false;
7775
+
7776
+ for (const auto& k : keys) {
7777
+ uint64_t v;
7778
+ if (!db.db->GetIntProperty(k, &v)) {
7779
+ fprintf(stderr, "waitforcompaction(%s): GetIntProperty(%s) failed\n",
7780
+ db.db->GetName().c_str(), k.c_str());
7781
+ exit(1);
7782
+ } else if (v > 0) {
7783
+ fprintf(stdout,
7784
+ "waitforcompaction(%s): active(%s). Sleep 10 seconds\n",
7785
+ db.db->GetName().c_str(), k.c_str());
7786
+ FLAGS_env->SleepForMicroseconds(10 * 1000000);
7787
+ retry = true;
7788
+ break;
7789
+ }
7790
+ }
7791
+
7792
+ if (!retry) {
7793
+ fprintf(stdout, "waitforcompaction(%s): finished\n",
7794
+ db.db->GetName().c_str());
7795
+ return;
7796
+ }
7797
+ }
7798
+ }
7799
+
7800
+ void WaitForCompaction() {
7801
+ // Give background threads a chance to wake
7802
+ FLAGS_env->SleepForMicroseconds(5 * 1000000);
7803
+
7804
+ // I am skeptical that this check race free. I hope that checking twice
7805
+ // reduces the chance.
7806
+ if (db_.db != nullptr) {
7807
+ WaitForCompactionHelper(db_);
7808
+ WaitForCompactionHelper(db_);
7809
+ } else {
7810
+ for (auto& db_with_cfh : multi_dbs_) {
7811
+ WaitForCompactionHelper(db_with_cfh);
7812
+ WaitForCompactionHelper(db_with_cfh);
7813
+ }
7814
+ }
7815
+ }
7816
+
7817
+ bool CompactLevelHelper(DBWithColumnFamilies& db_with_cfh, int from_level) {
7818
+ std::vector<LiveFileMetaData> files;
7819
+ db_with_cfh.db->GetLiveFilesMetaData(&files);
7820
+
7821
+ assert(from_level == 0 || from_level == 1);
7822
+
7823
+ int real_from_level = from_level;
7824
+ if (real_from_level > 0) {
7825
+ // With dynamic leveled compaction the first level with data beyond L0
7826
+ // might not be L1.
7827
+ real_from_level = std::numeric_limits<int>::max();
7828
+
7829
+ for (auto& f : files) {
7830
+ if (f.level > 0 && f.level < real_from_level) real_from_level = f.level;
7831
+ }
7832
+
7833
+ if (real_from_level == std::numeric_limits<int>::max()) {
7834
+ fprintf(stdout, "compact%d found 0 files to compact\n", from_level);
7835
+ return true;
7836
+ }
7837
+ }
7838
+
7839
+ // The goal is to compact from from_level to the level that follows it,
7840
+ // and with dynamic leveled compaction the next level might not be
7841
+ // real_from_level+1
7842
+ int next_level = std::numeric_limits<int>::max();
7843
+
7844
+ std::vector<std::string> files_to_compact;
7845
+ for (auto& f : files) {
7846
+ if (f.level == real_from_level)
7847
+ files_to_compact.push_back(f.name);
7848
+ else if (f.level > real_from_level && f.level < next_level)
7849
+ next_level = f.level;
7850
+ }
7851
+
7852
+ if (files_to_compact.empty()) {
7853
+ fprintf(stdout, "compact%d found 0 files to compact\n", from_level);
7854
+ return true;
7855
+ } else if (next_level == std::numeric_limits<int>::max()) {
7856
+ // There is no data beyond real_from_level. So we are done.
7857
+ fprintf(stdout, "compact%d found no data beyond L%d\n", from_level,
7858
+ real_from_level);
7859
+ return true;
7860
+ }
7861
+
7862
+ fprintf(stdout, "compact%d found %d files to compact from L%d to L%d\n",
7863
+ from_level, static_cast<int>(files_to_compact.size()),
7864
+ real_from_level, next_level);
7865
+
7866
+ ROCKSDB_NAMESPACE::CompactionOptions options;
7867
+ // Lets RocksDB use the configured compression for this level
7868
+ options.compression = ROCKSDB_NAMESPACE::kDisableCompressionOption;
7869
+
7870
+ ROCKSDB_NAMESPACE::ColumnFamilyDescriptor cfDesc;
7871
+ db_with_cfh.db->DefaultColumnFamily()->GetDescriptor(&cfDesc);
7872
+ options.output_file_size_limit = cfDesc.options.target_file_size_base;
7873
+
7874
+ Status status =
7875
+ db_with_cfh.db->CompactFiles(options, files_to_compact, next_level);
7876
+ if (!status.ok()) {
7877
+ // This can fail for valid reasons including the operation was aborted
7878
+ // or a filename is invalid because background compaction removed it.
7879
+ // Having read the current cases for which an error is raised I prefer
7880
+ // not to figure out whether an exception should be thrown here.
7881
+ fprintf(stderr, "compact%d CompactFiles failed: %s\n", from_level,
7882
+ status.ToString().c_str());
7883
+ return false;
7884
+ }
7885
+ return true;
7886
+ }
7887
+
7888
+ void CompactLevel(int from_level) {
7889
+ if (db_.db != nullptr) {
7890
+ while (!CompactLevelHelper(db_, from_level)) WaitForCompaction();
7891
+ }
7892
+ for (auto& db_with_cfh : multi_dbs_) {
7893
+ while (!CompactLevelHelper(db_with_cfh, from_level)) WaitForCompaction();
7894
+ }
7895
+ }
7896
+ #endif
7897
+
7898
+ void Flush() {
7899
+ FlushOptions flush_opt;
7900
+ flush_opt.wait = true;
7901
+
7902
+ if (db_.db != nullptr) {
7903
+ Status s = db_.db->Flush(flush_opt, db_.cfh);
7904
+ if (!s.ok()) {
7905
+ fprintf(stderr, "Flush failed: %s\n", s.ToString().c_str());
7906
+ exit(1);
7907
+ }
7908
+ } else {
7909
+ for (const auto& db_with_cfh : multi_dbs_) {
7910
+ Status s = db_with_cfh.db->Flush(flush_opt, db_with_cfh.cfh);
7911
+ if (!s.ok()) {
7912
+ fprintf(stderr, "Flush failed: %s\n", s.ToString().c_str());
7913
+ exit(1);
7914
+ }
7915
+ }
7916
+ }
7917
+ fprintf(stdout, "flush memtable\n");
7918
+ }
7919
+
7169
7920
  void ResetStats() {
7170
7921
  if (db_.db != nullptr) {
7171
7922
  db_.db->ResetStats();
@@ -7228,6 +7979,32 @@ class Benchmark {
7228
7979
  fprintf(stdout, "\n%s\n", stats.c_str());
7229
7980
  }
7230
7981
 
7982
+ void PrintStats(const std::vector<std::string>& keys) {
7983
+ if (db_.db != nullptr) {
7984
+ PrintStats(db_.db, keys);
7985
+ }
7986
+ for (const auto& db_with_cfh : multi_dbs_) {
7987
+ PrintStats(db_with_cfh.db, keys, true);
7988
+ }
7989
+ }
7990
+
7991
+ void PrintStats(DB* db, const std::vector<std::string>& keys,
7992
+ bool print_header = false) {
7993
+ if (print_header) {
7994
+ fprintf(stdout, "\n==== DB: %s ===\n", db->GetName().c_str());
7995
+ }
7996
+
7997
+ for (const auto& key : keys) {
7998
+ std::string stats;
7999
+ if (!db->GetProperty(key, &stats)) {
8000
+ stats = "(failed)";
8001
+ }
8002
+ fprintf(stdout, "%s: %s\n", key.c_str(), stats.c_str());
8003
+ }
8004
+ }
8005
+
8006
+ #ifndef ROCKSDB_LITE
8007
+
7231
8008
  void Replay(ThreadState* thread) {
7232
8009
  if (db_.db != nullptr) {
7233
8010
  Replay(thread, &db_);
@@ -7247,24 +8024,40 @@ class Benchmark {
7247
8024
  s.ToString().c_str());
7248
8025
  exit(1);
7249
8026
  }
7250
- Replayer replayer(db_with_cfh->db, db_with_cfh->cfh,
7251
- std::move(trace_reader));
7252
- replayer.SetFastForward(
7253
- static_cast<uint32_t>(FLAGS_trace_replay_fast_forward));
7254
- s = replayer.MultiThreadReplay(
7255
- static_cast<uint32_t>(FLAGS_trace_replay_threads));
8027
+ std::unique_ptr<Replayer> replayer;
8028
+ s = db_with_cfh->db->NewDefaultReplayer(db_with_cfh->cfh,
8029
+ std::move(trace_reader), &replayer);
8030
+ if (!s.ok()) {
8031
+ fprintf(stderr,
8032
+ "Encountered an error creating a default Replayer. "
8033
+ "Error: %s\n",
8034
+ s.ToString().c_str());
8035
+ exit(1);
8036
+ }
8037
+ s = replayer->Prepare();
8038
+ if (!s.ok()) {
8039
+ fprintf(stderr, "Prepare for replay failed. Error: %s\n",
8040
+ s.ToString().c_str());
8041
+ }
8042
+ s = replayer->Replay(
8043
+ ReplayOptions(static_cast<uint32_t>(FLAGS_trace_replay_threads),
8044
+ FLAGS_trace_replay_fast_forward),
8045
+ nullptr);
8046
+ replayer.reset();
7256
8047
  if (s.ok()) {
7257
- fprintf(stdout, "Replay started from trace_file: %s\n",
8048
+ fprintf(stdout, "Replay completed from trace_file: %s\n",
7258
8049
  FLAGS_trace_file.c_str());
7259
8050
  } else {
7260
- fprintf(stderr, "Starting replay failed. Error: %s\n",
7261
- s.ToString().c_str());
8051
+ fprintf(stderr, "Replay failed. Error: %s\n", s.ToString().c_str());
7262
8052
  }
7263
8053
  }
8054
+
8055
+ #endif // ROCKSDB_LITE
7264
8056
  };
7265
8057
 
7266
8058
  int db_bench_tool(int argc, char** argv) {
7267
8059
  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
8060
+ ConfigOptions config_options;
7268
8061
  static bool initialized = false;
7269
8062
  if (!initialized) {
7270
8063
  SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
@@ -7281,8 +8074,8 @@ int db_bench_tool(int argc, char** argv) {
7281
8074
  exit(1);
7282
8075
  }
7283
8076
  if (!FLAGS_statistics_string.empty()) {
7284
- Status s = ObjectRegistry::NewInstance()->NewSharedObject<Statistics>(
7285
- FLAGS_statistics_string, &dbstats);
8077
+ Status s = Statistics::CreateFromString(config_options,
8078
+ FLAGS_statistics_string, &dbstats);
7286
8079
  if (dbstats == nullptr) {
7287
8080
  fprintf(stderr,
7288
8081
  "No Statistics registered matching string: %s status=%s\n",
@@ -7314,34 +8107,55 @@ int db_bench_tool(int argc, char** argv) {
7314
8107
  FLAGS_compression_type_e =
7315
8108
  StringToCompressionType(FLAGS_compression_type.c_str());
7316
8109
 
8110
+ FLAGS_wal_compression_e =
8111
+ StringToCompressionType(FLAGS_wal_compression.c_str());
8112
+
8113
+ FLAGS_compressed_secondary_cache_compression_type_e = StringToCompressionType(
8114
+ FLAGS_compressed_secondary_cache_compression_type.c_str());
8115
+
7317
8116
  #ifndef ROCKSDB_LITE
8117
+ // Stacked BlobDB
7318
8118
  FLAGS_blob_db_compression_type_e =
7319
8119
  StringToCompressionType(FLAGS_blob_db_compression_type.c_str());
7320
8120
 
7321
- int env_opts =
7322
- !FLAGS_hdfs.empty() + !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
8121
+ int env_opts = !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
7323
8122
  if (env_opts > 1) {
7324
- fprintf(stderr,
7325
- "Error: --hdfs, --env_uri and --fs_uri are mutually exclusive\n");
8123
+ fprintf(stderr, "Error: --env_uri and --fs_uri are mutually exclusive\n");
7326
8124
  exit(1);
7327
8125
  }
7328
8126
 
7329
- if (!FLAGS_env_uri.empty()) {
7330
- Status s = Env::LoadEnv(FLAGS_env_uri, &FLAGS_env, &env_guard);
7331
- if (FLAGS_env == nullptr) {
7332
- fprintf(stderr, "No Env registered for URI: %s\n", FLAGS_env_uri.c_str());
7333
- exit(1);
7334
- }
7335
- } else if (!FLAGS_fs_uri.empty()) {
7336
- std::shared_ptr<FileSystem> fs;
7337
- Status s = FileSystem::Load(FLAGS_fs_uri, &fs);
7338
- if (fs == nullptr) {
7339
- fprintf(stderr, "Error: %s\n", s.ToString().c_str());
8127
+ if (env_opts == 1) {
8128
+ Status s = Env::CreateFromUri(config_options, FLAGS_env_uri, FLAGS_fs_uri,
8129
+ &FLAGS_env, &env_guard);
8130
+ if (!s.ok()) {
8131
+ fprintf(stderr, "Failed creating env: %s\n", s.ToString().c_str());
7340
8132
  exit(1);
7341
8133
  }
7342
- FLAGS_env = GetCompositeEnv(fs);
8134
+ } else if (FLAGS_simulate_hdd || FLAGS_simulate_hybrid_fs_file != "") {
8135
+ //**TODO: Make the simulate fs something that can be loaded
8136
+ // from the ObjectRegistry...
8137
+ static std::shared_ptr<ROCKSDB_NAMESPACE::Env> composite_env =
8138
+ NewCompositeEnv(std::make_shared<SimulatedHybridFileSystem>(
8139
+ FileSystem::Default(), FLAGS_simulate_hybrid_fs_file,
8140
+ /*throughput_multiplier=*/
8141
+ int{FLAGS_simulate_hybrid_hdd_multipliers},
8142
+ /*is_full_fs_warm=*/FLAGS_simulate_hdd));
8143
+ FLAGS_env = composite_env.get();
7343
8144
  }
8145
+
8146
+ // Let -readonly imply -use_existing_db
8147
+ FLAGS_use_existing_db |= FLAGS_readonly;
7344
8148
  #endif // ROCKSDB_LITE
8149
+
8150
+ if (!FLAGS_seed) {
8151
+ uint64_t now = FLAGS_env->GetSystemClock()->NowMicros();
8152
+ seed_base = static_cast<int64_t>(now);
8153
+ fprintf(stdout, "Set seed to %" PRIu64 " because --seed was 0\n",
8154
+ seed_base);
8155
+ } else {
8156
+ seed_base = FLAGS_seed;
8157
+ }
8158
+
7345
8159
  if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) {
7346
8160
  fprintf(stderr,
7347
8161
  "`-use_existing_db` must be true for `-use_existing_keys` to be "
@@ -7349,10 +8163,6 @@ int db_bench_tool(int argc, char** argv) {
7349
8163
  exit(1);
7350
8164
  }
7351
8165
 
7352
- if (!FLAGS_hdfs.empty()) {
7353
- FLAGS_env = new ROCKSDB_NAMESPACE::HdfsEnv(FLAGS_hdfs);
7354
- }
7355
-
7356
8166
  if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NONE"))
7357
8167
  FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::NONE;
7358
8168
  else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NORMAL"))
@@ -7364,13 +8174,12 @@ int db_bench_tool(int argc, char** argv) {
7364
8174
  else {
7365
8175
  fprintf(stdout, "Unknown compaction fadvice:%s\n",
7366
8176
  FLAGS_compaction_fadvice.c_str());
8177
+ exit(1);
7367
8178
  }
7368
8179
 
7369
8180
  FLAGS_value_size_distribution_type_e =
7370
8181
  StringToDistributionType(FLAGS_value_size_distribution_type.c_str());
7371
8182
 
7372
- FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
7373
-
7374
8183
  // Note options sanitization may increase thread pool sizes according to
7375
8184
  // max_background_flushes/max_background_compactions/max_background_jobs
7376
8185
  FLAGS_env->SetBackgroundThreads(FLAGS_num_high_pri_threads,