@nxtedition/rocksdb 5.2.21 → 5.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +510 -967
- package/binding.gyp +78 -72
- package/chained-batch.js +1 -2
- package/deps/rocksdb/build_version.cc +70 -4
- package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
- package/deps/rocksdb/rocksdb/Makefile +459 -469
- package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
- package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
- package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
- package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
- package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
- package/deps/rocksdb/rocksdb/db/builder.h +16 -37
- package/deps/rocksdb/rocksdb/db/c.cc +413 -208
- package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
- package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
- package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
- package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
- package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
- package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
- package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
- package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
- package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
- package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
- package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
- package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
- package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
- package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
- package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
- package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
- package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
- package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
- package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
- package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
- package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
- package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
- package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
- package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
- package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
- package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
- package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
- package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
- package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
- package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
- package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
- package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
- package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
- package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
- package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
- package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
- package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
- package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
- package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
- package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
- package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
- package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
- package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
- package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
- package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
- package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
- package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
- package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
- package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
- package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
- package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
- package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
- package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
- package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
- package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
- package/deps/rocksdb/rocksdb/env/env.cc +632 -42
- package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
- package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
- package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
- package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
- package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
- package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
- package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
- package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
- package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
- package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
- package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
- package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
- package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
- package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
- package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
- package/deps/rocksdb/rocksdb/file/filename.h +13 -8
- package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
- package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
- package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
- package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
- package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
- package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
- package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
- package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
- package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
- package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
- package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
- package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
- package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
- package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
- package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
- package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
- package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
- package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
- package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
- package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
- package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
- package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
- package/deps/rocksdb/rocksdb/options/options.cc +49 -17
- package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
- package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
- package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
- package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
- package/deps/rocksdb/rocksdb/port/lang.h +52 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
- package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
- package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
- package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
- package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
- package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
- package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
- package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
- package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
- package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
- package/deps/rocksdb/rocksdb/python.mk +9 -0
- package/deps/rocksdb/rocksdb/src.mk +82 -34
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
- package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
- package/deps/rocksdb/rocksdb/table/format.cc +258 -104
- package/deps/rocksdb/rocksdb/table/format.h +120 -109
- package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
- package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
- package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
- package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
- package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
- package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
- package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
- package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
- package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
- package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
- package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
- package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
- package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
- package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
- package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
- package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
- package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
- package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
- package/deps/rocksdb/rocksdb/util/channel.h +2 -0
- package/deps/rocksdb/rocksdb/util/coding.h +1 -33
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
- package/deps/rocksdb/rocksdb/util/compression.h +212 -7
- package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
- package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
- package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
- package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
- package/deps/rocksdb/rocksdb/util/defer.h +30 -1
- package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
- package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
- package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
- package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
- package/deps/rocksdb/rocksdb/util/hash.h +31 -1
- package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
- package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
- package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
- package/deps/rocksdb/rocksdb/util/heap.h +6 -1
- package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
- package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
- package/deps/rocksdb/rocksdb/util/math.h +74 -7
- package/deps/rocksdb/rocksdb/util/math128.h +13 -1
- package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
- package/deps/rocksdb/rocksdb/util/random.cc +9 -0
- package/deps/rocksdb/rocksdb/util/random.h +6 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
- package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
- package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
- package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
- package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
- package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
- package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
- package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
- package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
- package/deps/rocksdb/rocksdb/util/status.cc +32 -29
- package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
- package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
- package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
- package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
- package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
- package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
- package/deps/rocksdb/rocksdb/util/timer.h +55 -46
- package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
- package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
- package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
- package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
- package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
- package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
- package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
- package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
- package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
- package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
- package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
- package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
- package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
- package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
- package/deps/rocksdb/rocksdb.gyp +425 -446
- package/index.js +5 -87
- package/package-lock.json +23687 -0
- package/package.json +8 -9
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x64/node.napi.node +0 -0
- package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/README.md +0 -32
- package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
- package/deps/rocksdb/rocksdb/hdfs/README +0 -23
- package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
- package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
- package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
- package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
- package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
- package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
- package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
- package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
- package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
- package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
- package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
- package/deps/snappy/snappy-1.1.7/README.md +0 -149
- package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
|
@@ -18,25 +18,35 @@
|
|
|
18
18
|
#include <stdio.h>
|
|
19
19
|
#include <stdlib.h>
|
|
20
20
|
#include <sys/types.h>
|
|
21
|
+
#ifdef __APPLE__
|
|
22
|
+
#include <mach/host_info.h>
|
|
23
|
+
#include <mach/mach_host.h>
|
|
24
|
+
#include <sys/sysctl.h>
|
|
25
|
+
#endif
|
|
26
|
+
#ifdef __FreeBSD__
|
|
27
|
+
#include <sys/sysctl.h>
|
|
28
|
+
#endif
|
|
21
29
|
#include <atomic>
|
|
22
30
|
#include <cinttypes>
|
|
23
31
|
#include <condition_variable>
|
|
24
32
|
#include <cstddef>
|
|
33
|
+
#include <iostream>
|
|
25
34
|
#include <memory>
|
|
26
35
|
#include <mutex>
|
|
36
|
+
#include <queue>
|
|
27
37
|
#include <thread>
|
|
28
38
|
#include <unordered_map>
|
|
29
39
|
|
|
30
40
|
#include "db/db_impl/db_impl.h"
|
|
31
41
|
#include "db/malloc_stats.h"
|
|
32
42
|
#include "db/version_set.h"
|
|
33
|
-
#include "hdfs/env_hdfs.h"
|
|
34
43
|
#include "monitoring/histogram.h"
|
|
35
44
|
#include "monitoring/statistics.h"
|
|
36
45
|
#include "options/cf_options.h"
|
|
37
46
|
#include "port/port.h"
|
|
38
47
|
#include "port/stack_trace.h"
|
|
39
48
|
#include "rocksdb/cache.h"
|
|
49
|
+
#include "rocksdb/convenience.h"
|
|
40
50
|
#include "rocksdb/db.h"
|
|
41
51
|
#include "rocksdb/env.h"
|
|
42
52
|
#include "rocksdb/filter_policy.h"
|
|
@@ -45,21 +55,29 @@
|
|
|
45
55
|
#include "rocksdb/perf_context.h"
|
|
46
56
|
#include "rocksdb/persistent_cache.h"
|
|
47
57
|
#include "rocksdb/rate_limiter.h"
|
|
58
|
+
#include "rocksdb/secondary_cache.h"
|
|
48
59
|
#include "rocksdb/slice.h"
|
|
49
60
|
#include "rocksdb/slice_transform.h"
|
|
50
61
|
#include "rocksdb/stats_history.h"
|
|
62
|
+
#include "rocksdb/table.h"
|
|
51
63
|
#include "rocksdb/utilities/object_registry.h"
|
|
52
64
|
#include "rocksdb/utilities/optimistic_transaction_db.h"
|
|
65
|
+
#include "rocksdb/utilities/options_type.h"
|
|
53
66
|
#include "rocksdb/utilities/options_util.h"
|
|
67
|
+
#ifndef ROCKSDB_LITE
|
|
68
|
+
#include "rocksdb/utilities/replayer.h"
|
|
69
|
+
#endif // ROCKSDB_LITE
|
|
54
70
|
#include "rocksdb/utilities/sim_cache.h"
|
|
55
71
|
#include "rocksdb/utilities/transaction.h"
|
|
56
72
|
#include "rocksdb/utilities/transaction_db.h"
|
|
57
73
|
#include "rocksdb/write_batch.h"
|
|
58
74
|
#include "test_util/testutil.h"
|
|
59
75
|
#include "test_util/transaction_test_util.h"
|
|
76
|
+
#include "tools/simulated_hybrid_file_system.h"
|
|
60
77
|
#include "util/cast_util.h"
|
|
61
78
|
#include "util/compression.h"
|
|
62
79
|
#include "util/crc32c.h"
|
|
80
|
+
#include "util/file_checksum_helper.h"
|
|
63
81
|
#include "util/gflags_compat.h"
|
|
64
82
|
#include "util/mutexlock.h"
|
|
65
83
|
#include "util/random.h"
|
|
@@ -67,6 +85,7 @@
|
|
|
67
85
|
#include "util/string_util.h"
|
|
68
86
|
#include "util/xxhash.h"
|
|
69
87
|
#include "utilities/blob_db/blob_db.h"
|
|
88
|
+
#include "utilities/counted_fs.h"
|
|
70
89
|
#include "utilities/merge_operators.h"
|
|
71
90
|
#include "utilities/merge_operators/bytesxor.h"
|
|
72
91
|
#include "utilities/merge_operators/sortlist.h"
|
|
@@ -84,6 +103,12 @@ using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
|
|
84
103
|
using GFLAGS_NAMESPACE::RegisterFlagValidator;
|
|
85
104
|
using GFLAGS_NAMESPACE::SetUsageMessage;
|
|
86
105
|
|
|
106
|
+
#ifdef ROCKSDB_LITE
|
|
107
|
+
#define IF_ROCKSDB_LITE(Then, Else) Then
|
|
108
|
+
#else
|
|
109
|
+
#define IF_ROCKSDB_LITE(Then, Else) Else
|
|
110
|
+
#endif
|
|
111
|
+
|
|
87
112
|
DEFINE_string(
|
|
88
113
|
benchmarks,
|
|
89
114
|
"fillseq,"
|
|
@@ -102,6 +127,12 @@ DEFINE_string(
|
|
|
102
127
|
"readreverse,"
|
|
103
128
|
"compact,"
|
|
104
129
|
"compactall,"
|
|
130
|
+
"flush,"
|
|
131
|
+
IF_ROCKSDB_LITE("",
|
|
132
|
+
"compact0,"
|
|
133
|
+
"compact1,"
|
|
134
|
+
"waitforcompaction,"
|
|
135
|
+
)
|
|
105
136
|
"multireadrandom,"
|
|
106
137
|
"mixgraph,"
|
|
107
138
|
"readseq,"
|
|
@@ -119,6 +150,8 @@ DEFINE_string(
|
|
|
119
150
|
"fill100K,"
|
|
120
151
|
"crc32c,"
|
|
121
152
|
"xxhash,"
|
|
153
|
+
"xxhash64,"
|
|
154
|
+
"xxh3,"
|
|
122
155
|
"compress,"
|
|
123
156
|
"uncompress,"
|
|
124
157
|
"acquireload,"
|
|
@@ -127,6 +160,7 @@ DEFINE_string(
|
|
|
127
160
|
"randomreplacekeys,"
|
|
128
161
|
"timeseries,"
|
|
129
162
|
"getmergeoperands",
|
|
163
|
+
"readrandomoperands,"
|
|
130
164
|
|
|
131
165
|
"Comma-separated list of operations to run in the specified"
|
|
132
166
|
" order. Available benchmarks:\n"
|
|
@@ -177,8 +211,10 @@ DEFINE_string(
|
|
|
177
211
|
"overwrite\n"
|
|
178
212
|
"\tseekrandomwhilemerging -- seekrandom and 1 thread doing "
|
|
179
213
|
"merge\n"
|
|
180
|
-
"\tcrc32c -- repeated crc32c of
|
|
181
|
-
"\txxhash -- repeated xxHash of
|
|
214
|
+
"\tcrc32c -- repeated crc32c of <block size> data\n"
|
|
215
|
+
"\txxhash -- repeated xxHash of <block size> data\n"
|
|
216
|
+
"\txxhash64 -- repeated xxHash64 of <block size> data\n"
|
|
217
|
+
"\txxh3 -- repeated XXH3 of <block size> data\n"
|
|
182
218
|
"\tacquireload -- load N*1000 times\n"
|
|
183
219
|
"\tfillseekseq -- write N values in sequential key, then read "
|
|
184
220
|
"them by seeking to each key\n"
|
|
@@ -191,18 +227,30 @@ DEFINE_string(
|
|
|
191
227
|
"Meta operations:\n"
|
|
192
228
|
"\tcompact -- Compact the entire DB; If multiple, randomly choose one\n"
|
|
193
229
|
"\tcompactall -- Compact the entire DB\n"
|
|
230
|
+
IF_ROCKSDB_LITE("",
|
|
231
|
+
"\tcompact0 -- compact L0 into L1\n"
|
|
232
|
+
"\tcompact1 -- compact L1 into L2\n"
|
|
233
|
+
"\twaitforcompaction - pause until compaction is (probably) done\n"
|
|
234
|
+
)
|
|
235
|
+
"\tflush - flush the memtable\n"
|
|
194
236
|
"\tstats -- Print DB stats\n"
|
|
195
237
|
"\tresetstats -- Reset DB stats\n"
|
|
196
238
|
"\tlevelstats -- Print the number of files and bytes per level\n"
|
|
239
|
+
"\tmemstats -- Print memtable stats\n"
|
|
197
240
|
"\tsstables -- Print sstable info\n"
|
|
198
241
|
"\theapprofile -- Dump a heap profile (if supported by this port)\n"
|
|
242
|
+
IF_ROCKSDB_LITE("",
|
|
199
243
|
"\treplay -- replay the trace file specified with trace_file\n"
|
|
244
|
+
)
|
|
200
245
|
"\tgetmergeoperands -- Insert lots of merge records which are a list of "
|
|
201
246
|
"sorted ints for a key and then compare performance of lookup for another "
|
|
202
|
-
"key "
|
|
203
|
-
"
|
|
204
|
-
"
|
|
205
|
-
"
|
|
247
|
+
"key by doing a Get followed by binary searching in the large sorted list "
|
|
248
|
+
"vs doing a GetMergeOperands and binary searching in the operands which "
|
|
249
|
+
"are sorted sub-lists. The MergeOperator used is sortlist.h\n"
|
|
250
|
+
"\treadrandomoperands -- read random keys using `GetMergeOperands()`. An "
|
|
251
|
+
"operation includes a rare but possible retry in case it got "
|
|
252
|
+
"`Status::Incomplete()`. This happens upon encountering more keys than "
|
|
253
|
+
"have ever been seen by the thread (or eight initially)\n");
|
|
206
254
|
|
|
207
255
|
DEFINE_int64(num, 1000000, "Number of key/values to place in database");
|
|
208
256
|
|
|
@@ -241,8 +289,10 @@ DEFINE_int64(deletes, -1, "Number of delete operations to do. "
|
|
|
241
289
|
|
|
242
290
|
DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality");
|
|
243
291
|
|
|
244
|
-
DEFINE_int64(seed, 0,
|
|
245
|
-
"
|
|
292
|
+
DEFINE_int64(seed, 0,
|
|
293
|
+
"Seed base for random number generators. "
|
|
294
|
+
"When 0 it is derived from the current time.");
|
|
295
|
+
static int64_t seed_base;
|
|
246
296
|
|
|
247
297
|
DEFINE_int32(threads, 1, "Number of concurrent threads to run.");
|
|
248
298
|
|
|
@@ -300,6 +350,58 @@ DEFINE_int32(num_multi_db, 0,
|
|
|
300
350
|
DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
|
|
301
351
|
" to this fraction of their original size after compression");
|
|
302
352
|
|
|
353
|
+
DEFINE_double(
|
|
354
|
+
overwrite_probability, 0.0,
|
|
355
|
+
"Used in 'filluniquerandom' benchmark: for each write operation, "
|
|
356
|
+
"we give a probability to perform an overwrite instead. The key used for "
|
|
357
|
+
"the overwrite is randomly chosen from the last 'overwrite_window_size' "
|
|
358
|
+
"keys previously inserted into the DB. "
|
|
359
|
+
"Valid overwrite_probability values: [0.0, 1.0].");
|
|
360
|
+
|
|
361
|
+
DEFINE_uint32(overwrite_window_size, 1,
|
|
362
|
+
"Used in 'filluniquerandom' benchmark. For each write operation,"
|
|
363
|
+
" when the overwrite_probability flag is set by the user, the "
|
|
364
|
+
"key used to perform an overwrite is randomly chosen from the "
|
|
365
|
+
"last 'overwrite_window_size' keys previously inserted into DB. "
|
|
366
|
+
"Warning: large values can affect throughput. "
|
|
367
|
+
"Valid overwrite_window_size values: [1, kMaxUint32].");
|
|
368
|
+
|
|
369
|
+
DEFINE_uint64(
|
|
370
|
+
disposable_entries_delete_delay, 0,
|
|
371
|
+
"Minimum delay in microseconds for the series of Deletes "
|
|
372
|
+
"to be issued. When 0 the insertion of the last disposable entry is "
|
|
373
|
+
"immediately followed by the issuance of the Deletes. "
|
|
374
|
+
"(only compatible with fillanddeleteuniquerandom benchmark).");
|
|
375
|
+
|
|
376
|
+
DEFINE_uint64(disposable_entries_batch_size, 0,
|
|
377
|
+
"Number of consecutively inserted disposable KV entries "
|
|
378
|
+
"that will be deleted after 'delete_delay' microseconds. "
|
|
379
|
+
"A series of Deletes is always issued once all the "
|
|
380
|
+
"disposable KV entries it targets have been inserted "
|
|
381
|
+
"into the DB. When 0 no deletes are issued and a "
|
|
382
|
+
"regular 'filluniquerandom' benchmark occurs. "
|
|
383
|
+
"(only compatible with fillanddeleteuniquerandom benchmark)");
|
|
384
|
+
|
|
385
|
+
DEFINE_int32(disposable_entries_value_size, 64,
|
|
386
|
+
"Size of the values (in bytes) of the entries targeted by "
|
|
387
|
+
"selective deletes. "
|
|
388
|
+
"(only compatible with fillanddeleteuniquerandom benchmark)");
|
|
389
|
+
|
|
390
|
+
DEFINE_uint64(
|
|
391
|
+
persistent_entries_batch_size, 0,
|
|
392
|
+
"Number of KV entries being inserted right before the deletes "
|
|
393
|
+
"targeting the disposable KV entries are issued. These "
|
|
394
|
+
"persistent keys are not targeted by the deletes, and will always "
|
|
395
|
+
"remain valid in the DB. (only compatible with "
|
|
396
|
+
"--benchmarks='fillanddeleteuniquerandom' "
|
|
397
|
+
"and used when--disposable_entries_batch_size is > 0).");
|
|
398
|
+
|
|
399
|
+
DEFINE_int32(persistent_entries_value_size, 64,
|
|
400
|
+
"Size of the values (in bytes) of the entries not targeted by "
|
|
401
|
+
"deletes. (only compatible with "
|
|
402
|
+
"--benchmarks='fillanddeleteuniquerandom' "
|
|
403
|
+
"and used when--disposable_entries_batch_size is > 0).");
|
|
404
|
+
|
|
303
405
|
DEFINE_double(read_random_exp_range, 0.0,
|
|
304
406
|
"Read random's key will be generated using distribution of "
|
|
305
407
|
"num * exp(-r) where r is uniform number from 0 to this value. "
|
|
@@ -395,8 +497,6 @@ DEFINE_int32(max_background_compactions,
|
|
|
395
497
|
"The maximum number of concurrent background compactions"
|
|
396
498
|
" that can occur in parallel.");
|
|
397
499
|
|
|
398
|
-
DEFINE_int32(base_background_compactions, -1, "DEPRECATED");
|
|
399
|
-
|
|
400
500
|
DEFINE_uint64(subcompactions, 1,
|
|
401
501
|
"Maximum number of subcompactions to divide L0-L1 compactions "
|
|
402
502
|
"into.");
|
|
@@ -439,6 +539,9 @@ DEFINE_int32(universal_compression_size_percent, -1,
|
|
|
439
539
|
DEFINE_bool(universal_allow_trivial_move, false,
|
|
440
540
|
"Allow trivial move in universal compaction.");
|
|
441
541
|
|
|
542
|
+
DEFINE_bool(universal_incremental, false,
|
|
543
|
+
"Enable incremental compactions in universal compaction.");
|
|
544
|
+
|
|
442
545
|
DEFINE_int64(cache_size, 8 << 20, // 8MB
|
|
443
546
|
"Number of bytes to use as a cache of uncompressed data");
|
|
444
547
|
|
|
@@ -455,6 +558,38 @@ DEFINE_double(cache_high_pri_pool_ratio, 0.0,
|
|
|
455
558
|
DEFINE_bool(use_clock_cache, false,
|
|
456
559
|
"Replace default LRU block cache with clock cache.");
|
|
457
560
|
|
|
561
|
+
DEFINE_bool(use_compressed_secondary_cache, false,
|
|
562
|
+
"Use the CompressedSecondaryCache as the secondary cache.");
|
|
563
|
+
|
|
564
|
+
DEFINE_int64(compressed_secondary_cache_size, 8 << 20, // 8MB
|
|
565
|
+
"Number of bytes to use as a cache of data");
|
|
566
|
+
|
|
567
|
+
DEFINE_int32(compressed_secondary_cache_numshardbits, 6,
|
|
568
|
+
"Number of shards for the block cache"
|
|
569
|
+
" is 2 ** compressed_secondary_cache_numshardbits."
|
|
570
|
+
" Negative means use default settings."
|
|
571
|
+
" This is applied only if FLAGS_cache_size is non-negative.");
|
|
572
|
+
|
|
573
|
+
DEFINE_double(compressed_secondary_cache_high_pri_pool_ratio, 0.0,
|
|
574
|
+
"Ratio of block cache reserve for high pri blocks. "
|
|
575
|
+
"If > 0.0, we also enable "
|
|
576
|
+
"cache_index_and_filter_blocks_with_high_priority.");
|
|
577
|
+
|
|
578
|
+
DEFINE_string(compressed_secondary_cache_compression_type, "lz4",
|
|
579
|
+
"The compression algorithm to use for large "
|
|
580
|
+
"values stored in CompressedSecondaryCache.");
|
|
581
|
+
static enum ROCKSDB_NAMESPACE::CompressionType
|
|
582
|
+
FLAGS_compressed_secondary_cache_compression_type_e =
|
|
583
|
+
ROCKSDB_NAMESPACE::kLZ4Compression;
|
|
584
|
+
|
|
585
|
+
DEFINE_uint32(
|
|
586
|
+
compressed_secondary_cache_compress_format_version, 2,
|
|
587
|
+
"compress_format_version can have two values: "
|
|
588
|
+
"compress_format_version == 1 -- decompressed size is not included"
|
|
589
|
+
" in the block header."
|
|
590
|
+
"compress_format_version == 2 -- decompressed size is included"
|
|
591
|
+
" in the block header in varint32 format.");
|
|
592
|
+
|
|
458
593
|
DEFINE_int64(simcache_size, -1,
|
|
459
594
|
"Number of bytes to use as a simcache of "
|
|
460
595
|
"uncompressed data. Nagative value disables simcache.");
|
|
@@ -532,6 +667,10 @@ DEFINE_bool(block_align,
|
|
|
532
667
|
ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_align,
|
|
533
668
|
"Align data blocks on page size");
|
|
534
669
|
|
|
670
|
+
DEFINE_int64(prepopulate_block_cache, 0,
|
|
671
|
+
"Pre-populate hot/warm blocks in block cache. 0 to disable and 1 "
|
|
672
|
+
"to insert during flush");
|
|
673
|
+
|
|
535
674
|
DEFINE_bool(use_data_block_hash_index, false,
|
|
536
675
|
"if use kDataBlockBinaryAndHash "
|
|
537
676
|
"instead of kDataBlockBinarySearch. "
|
|
@@ -558,9 +697,6 @@ DEFINE_int32(file_opening_threads,
|
|
|
558
697
|
"If open_files is set to -1, this option set the number of "
|
|
559
698
|
"threads that will be used to open files during DB::Open()");
|
|
560
699
|
|
|
561
|
-
DEFINE_bool(new_table_reader_for_compaction_inputs, true,
|
|
562
|
-
"If true, uses a separate file handle for compaction inputs");
|
|
563
|
-
|
|
564
700
|
DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size");
|
|
565
701
|
|
|
566
702
|
DEFINE_int32(log_readahead_size, 0, "WAL and manifest readahead size");
|
|
@@ -571,8 +707,9 @@ DEFINE_int32(random_access_max_buffer_size, 1024 * 1024,
|
|
|
571
707
|
DEFINE_int32(writable_file_max_buffer_size, 1024 * 1024,
|
|
572
708
|
"Maximum write buffer for Writable File");
|
|
573
709
|
|
|
574
|
-
DEFINE_int32(bloom_bits, -1,
|
|
575
|
-
" use default
|
|
710
|
+
DEFINE_int32(bloom_bits, -1,
|
|
711
|
+
"Bloom filter bits per key. Negative means use default."
|
|
712
|
+
"Zero disables.");
|
|
576
713
|
|
|
577
714
|
DEFINE_bool(use_ribbon_filter, false, "Use Ribbon instead of Bloom filter");
|
|
578
715
|
|
|
@@ -584,6 +721,10 @@ DEFINE_bool(memtable_whole_key_filtering, false,
|
|
|
584
721
|
DEFINE_bool(memtable_use_huge_page, false,
|
|
585
722
|
"Try to use huge page in memtables.");
|
|
586
723
|
|
|
724
|
+
DEFINE_bool(whole_key_filtering,
|
|
725
|
+
ROCKSDB_NAMESPACE::BlockBasedTableOptions().whole_key_filtering,
|
|
726
|
+
"Use whole keys (in addition to prefixes) in SST bloom filter.");
|
|
727
|
+
|
|
587
728
|
DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
|
|
588
729
|
" database. If you set this flag and also specify a benchmark that"
|
|
589
730
|
" wants a fresh database, that benchmark will fail.");
|
|
@@ -632,6 +773,10 @@ DEFINE_bool(verify_checksum, true,
|
|
|
632
773
|
"Verify checksum for every block read"
|
|
633
774
|
" from storage");
|
|
634
775
|
|
|
776
|
+
DEFINE_int32(checksum_type,
|
|
777
|
+
ROCKSDB_NAMESPACE::BlockBasedTableOptions().checksum,
|
|
778
|
+
"ChecksumType as an int");
|
|
779
|
+
|
|
635
780
|
DEFINE_bool(statistics, false, "Database statistics");
|
|
636
781
|
DEFINE_int32(stats_level, ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers,
|
|
637
782
|
"stats level for statistics");
|
|
@@ -649,6 +794,14 @@ DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync");
|
|
|
649
794
|
|
|
650
795
|
DEFINE_bool(disable_wal, false, "If true, do not write WAL for write.");
|
|
651
796
|
|
|
797
|
+
DEFINE_bool(manual_wal_flush, false,
|
|
798
|
+
"If true, buffer WAL until buffer is full or a manual FlushWAL().");
|
|
799
|
+
|
|
800
|
+
DEFINE_string(wal_compression, "none",
|
|
801
|
+
"Algorithm to use for WAL compression. none to disable.");
|
|
802
|
+
static enum ROCKSDB_NAMESPACE::CompressionType FLAGS_wal_compression_e =
|
|
803
|
+
ROCKSDB_NAMESPACE::kNoCompression;
|
|
804
|
+
|
|
652
805
|
DEFINE_string(wal_dir, "", "If not empty, use the given dir for WAL");
|
|
653
806
|
|
|
654
807
|
DEFINE_string(truth_db, "/dev/shm/truth_db/dbbench",
|
|
@@ -680,24 +833,23 @@ DEFINE_string(max_bytes_for_level_multiplier_additional, "",
|
|
|
680
833
|
|
|
681
834
|
DEFINE_int32(level0_stop_writes_trigger,
|
|
682
835
|
ROCKSDB_NAMESPACE::Options().level0_stop_writes_trigger,
|
|
683
|
-
"Number of files in level-0"
|
|
684
|
-
" that will trigger put stop.");
|
|
836
|
+
"Number of files in level-0 that will trigger put stop.");
|
|
685
837
|
|
|
686
838
|
DEFINE_int32(level0_slowdown_writes_trigger,
|
|
687
839
|
ROCKSDB_NAMESPACE::Options().level0_slowdown_writes_trigger,
|
|
688
|
-
"Number of files in level-0"
|
|
689
|
-
" that will slow down writes.");
|
|
840
|
+
"Number of files in level-0 that will slow down writes.");
|
|
690
841
|
|
|
691
842
|
DEFINE_int32(level0_file_num_compaction_trigger,
|
|
692
843
|
ROCKSDB_NAMESPACE::Options().level0_file_num_compaction_trigger,
|
|
693
|
-
"Number of files in level-0"
|
|
694
|
-
" when compactions start");
|
|
844
|
+
"Number of files in level-0 when compactions start.");
|
|
695
845
|
|
|
696
846
|
DEFINE_uint64(periodic_compaction_seconds,
|
|
697
847
|
ROCKSDB_NAMESPACE::Options().periodic_compaction_seconds,
|
|
698
848
|
"Files older than this will be picked up for compaction and"
|
|
699
849
|
" rewritten to the same level");
|
|
700
850
|
|
|
851
|
+
DEFINE_uint64(ttl_seconds, ROCKSDB_NAMESPACE::Options().ttl, "Set options.ttl");
|
|
852
|
+
|
|
701
853
|
static bool ValidateInt32Percent(const char* flagname, int32_t value) {
|
|
702
854
|
if (value <= 0 || value>=100) {
|
|
703
855
|
fprintf(stderr, "Invalid value for --%s: %d, 0< pct <100 \n",
|
|
@@ -722,11 +874,25 @@ DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/"
|
|
|
722
874
|
"deletepercent), so deletepercent must be smaller than (100 - "
|
|
723
875
|
"FLAGS_readwritepercent)");
|
|
724
876
|
|
|
725
|
-
DEFINE_bool(optimize_filters_for_hits,
|
|
877
|
+
DEFINE_bool(optimize_filters_for_hits,
|
|
878
|
+
ROCKSDB_NAMESPACE::Options().optimize_filters_for_hits,
|
|
726
879
|
"Optimizes bloom filters for workloads for most lookups return "
|
|
727
880
|
"a value. For now this doesn't create bloom filters for the max "
|
|
728
881
|
"level of the LSM to reduce metadata that should fit in RAM. ");
|
|
729
882
|
|
|
883
|
+
DEFINE_bool(paranoid_checks, ROCKSDB_NAMESPACE::Options().paranoid_checks,
|
|
884
|
+
"RocksDB will aggressively check consistency of the data.");
|
|
885
|
+
|
|
886
|
+
DEFINE_bool(force_consistency_checks,
|
|
887
|
+
ROCKSDB_NAMESPACE::Options().force_consistency_checks,
|
|
888
|
+
"Runs consistency checks on the LSM every time a change is "
|
|
889
|
+
"applied.");
|
|
890
|
+
|
|
891
|
+
DEFINE_bool(check_flush_compaction_key_order,
|
|
892
|
+
ROCKSDB_NAMESPACE::Options().check_flush_compaction_key_order,
|
|
893
|
+
"During flush or compaction, check whether keys inserted to "
|
|
894
|
+
"output files are in order.");
|
|
895
|
+
|
|
730
896
|
DEFINE_uint64(delete_obsolete_files_period_micros, 0,
|
|
731
897
|
"Ignored. Left here for backward compatibility");
|
|
732
898
|
|
|
@@ -739,8 +905,7 @@ DEFINE_int64(writes_per_range_tombstone, 0,
|
|
|
739
905
|
DEFINE_int64(range_tombstone_width, 100, "Number of keys in tombstone's range");
|
|
740
906
|
|
|
741
907
|
DEFINE_int64(max_num_range_tombstones, 0,
|
|
742
|
-
"Maximum number of range tombstones "
|
|
743
|
-
"to insert.");
|
|
908
|
+
"Maximum number of range tombstones to insert.");
|
|
744
909
|
|
|
745
910
|
DEFINE_bool(expand_range_tombstones, false,
|
|
746
911
|
"Expand range tombstone into sequential regular tombstones.");
|
|
@@ -796,55 +961,104 @@ DEFINE_bool(fifo_compaction_allow_compaction, true,
|
|
|
796
961
|
|
|
797
962
|
DEFINE_uint64(fifo_compaction_ttl, 0, "TTL for the SST Files in seconds.");
|
|
798
963
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
964
|
+
DEFINE_uint64(fifo_age_for_warm, 0, "age_for_warm for FIFO compaction.");
|
|
965
|
+
|
|
966
|
+
// Stacked BlobDB Options
|
|
967
|
+
DEFINE_bool(use_blob_db, false, "[Stacked BlobDB] Open a BlobDB instance.");
|
|
803
968
|
|
|
804
969
|
DEFINE_bool(
|
|
805
970
|
blob_db_enable_gc,
|
|
806
971
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().enable_garbage_collection,
|
|
807
|
-
"Enable BlobDB garbage collection.");
|
|
972
|
+
"[Stacked BlobDB] Enable BlobDB garbage collection.");
|
|
808
973
|
|
|
809
974
|
DEFINE_double(
|
|
810
975
|
blob_db_gc_cutoff,
|
|
811
976
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().garbage_collection_cutoff,
|
|
812
|
-
"Cutoff ratio for BlobDB garbage collection.");
|
|
977
|
+
"[Stacked BlobDB] Cutoff ratio for BlobDB garbage collection.");
|
|
813
978
|
|
|
814
979
|
DEFINE_bool(blob_db_is_fifo,
|
|
815
980
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().is_fifo,
|
|
816
|
-
"Enable FIFO eviction strategy in BlobDB.");
|
|
981
|
+
"[Stacked BlobDB] Enable FIFO eviction strategy in BlobDB.");
|
|
817
982
|
|
|
818
983
|
DEFINE_uint64(blob_db_max_db_size,
|
|
819
984
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().max_db_size,
|
|
820
|
-
"Max size limit of the directory where blob
|
|
985
|
+
"[Stacked BlobDB] Max size limit of the directory where blob "
|
|
986
|
+
"files are stored.");
|
|
821
987
|
|
|
822
|
-
DEFINE_uint64(
|
|
823
|
-
|
|
824
|
-
|
|
988
|
+
DEFINE_uint64(blob_db_max_ttl_range, 0,
|
|
989
|
+
"[Stacked BlobDB] TTL range to generate BlobDB data (in "
|
|
990
|
+
"seconds). 0 means no TTL.");
|
|
825
991
|
|
|
826
|
-
DEFINE_uint64(
|
|
827
|
-
|
|
828
|
-
|
|
992
|
+
DEFINE_uint64(
|
|
993
|
+
blob_db_ttl_range_secs,
|
|
994
|
+
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().ttl_range_secs,
|
|
995
|
+
"[Stacked BlobDB] TTL bucket size to use when creating blob files.");
|
|
829
996
|
|
|
830
|
-
DEFINE_uint64(
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
997
|
+
DEFINE_uint64(
|
|
998
|
+
blob_db_min_blob_size,
|
|
999
|
+
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size,
|
|
1000
|
+
"[Stacked BlobDB] Smallest blob to store in a file. Blobs "
|
|
1001
|
+
"smaller than this will be inlined with the key in the LSM tree.");
|
|
834
1002
|
|
|
835
1003
|
DEFINE_uint64(blob_db_bytes_per_sync,
|
|
836
1004
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().bytes_per_sync,
|
|
837
|
-
"Bytes to sync blob file at.");
|
|
1005
|
+
"[Stacked BlobDB] Bytes to sync blob file at.");
|
|
838
1006
|
|
|
839
1007
|
DEFINE_uint64(blob_db_file_size,
|
|
840
1008
|
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().blob_file_size,
|
|
841
|
-
"Target size of each blob file.");
|
|
1009
|
+
"[Stacked BlobDB] Target size of each blob file.");
|
|
842
1010
|
|
|
843
|
-
DEFINE_string(
|
|
844
|
-
|
|
1011
|
+
DEFINE_string(
|
|
1012
|
+
blob_db_compression_type, "snappy",
|
|
1013
|
+
"[Stacked BlobDB] Algorithm to use to compress blobs in blob files.");
|
|
845
1014
|
static enum ROCKSDB_NAMESPACE::CompressionType
|
|
846
1015
|
FLAGS_blob_db_compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression;
|
|
847
1016
|
|
|
1017
|
+
#endif // ROCKSDB_LITE
|
|
1018
|
+
|
|
1019
|
+
// Integrated BlobDB options
|
|
1020
|
+
DEFINE_bool(
|
|
1021
|
+
enable_blob_files,
|
|
1022
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().enable_blob_files,
|
|
1023
|
+
"[Integrated BlobDB] Enable writing large values to separate blob files.");
|
|
1024
|
+
|
|
1025
|
+
DEFINE_uint64(min_blob_size,
|
|
1026
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().min_blob_size,
|
|
1027
|
+
"[Integrated BlobDB] The size of the smallest value to be stored "
|
|
1028
|
+
"separately in a blob file.");
|
|
1029
|
+
|
|
1030
|
+
DEFINE_uint64(blob_file_size,
|
|
1031
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_size,
|
|
1032
|
+
"[Integrated BlobDB] The size limit for blob files.");
|
|
1033
|
+
|
|
1034
|
+
DEFINE_string(blob_compression_type, "none",
|
|
1035
|
+
"[Integrated BlobDB] The compression algorithm to use for large "
|
|
1036
|
+
"values stored in blob files.");
|
|
1037
|
+
|
|
1038
|
+
DEFINE_bool(enable_blob_garbage_collection,
|
|
1039
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
|
1040
|
+
.enable_blob_garbage_collection,
|
|
1041
|
+
"[Integrated BlobDB] Enable blob garbage collection.");
|
|
1042
|
+
|
|
1043
|
+
DEFINE_double(blob_garbage_collection_age_cutoff,
|
|
1044
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
|
1045
|
+
.blob_garbage_collection_age_cutoff,
|
|
1046
|
+
"[Integrated BlobDB] The cutoff in terms of blob file age for "
|
|
1047
|
+
"garbage collection.");
|
|
1048
|
+
|
|
1049
|
+
DEFINE_double(blob_garbage_collection_force_threshold,
|
|
1050
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
|
1051
|
+
.blob_garbage_collection_force_threshold,
|
|
1052
|
+
"[Integrated BlobDB] The threshold for the ratio of garbage in "
|
|
1053
|
+
"the oldest blob files for forcing garbage collection.");
|
|
1054
|
+
|
|
1055
|
+
DEFINE_uint64(blob_compaction_readahead_size,
|
|
1056
|
+
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
|
1057
|
+
.blob_compaction_readahead_size,
|
|
1058
|
+
"[Integrated BlobDB] Compaction readahead for blob files.");
|
|
1059
|
+
|
|
1060
|
+
#ifndef ROCKSDB_LITE
|
|
1061
|
+
|
|
848
1062
|
// Secondary DB instance Options
|
|
849
1063
|
DEFINE_bool(use_secondary_db, false,
|
|
850
1064
|
"Open a RocksDB secondary instance. A primary instance can be "
|
|
@@ -866,10 +1080,12 @@ DEFINE_bool(report_bg_io_stats, false,
|
|
|
866
1080
|
DEFINE_bool(use_stderr_info_logger, false,
|
|
867
1081
|
"Write info logs to stderr instead of to LOG file. ");
|
|
868
1082
|
|
|
1083
|
+
#ifndef ROCKSDB_LITE
|
|
1084
|
+
|
|
869
1085
|
DEFINE_string(trace_file, "", "Trace workload to a file. ");
|
|
870
1086
|
|
|
871
|
-
|
|
872
|
-
|
|
1087
|
+
DEFINE_double(trace_replay_fast_forward, 1.0,
|
|
1088
|
+
"Fast forward trace replay, must > 0.0.");
|
|
873
1089
|
DEFINE_int32(block_cache_trace_sampling_frequency, 1,
|
|
874
1090
|
"Block cache trace sampling frequency, termed s. It uses spatial "
|
|
875
1091
|
"downsampling and samples accesses to one out of s blocks.");
|
|
@@ -883,6 +1099,37 @@ DEFINE_string(block_cache_trace_file, "", "Block cache trace file path.");
|
|
|
883
1099
|
DEFINE_int32(trace_replay_threads, 1,
|
|
884
1100
|
"The number of threads to replay, must >=1.");
|
|
885
1101
|
|
|
1102
|
+
DEFINE_bool(io_uring_enabled, true,
|
|
1103
|
+
"If true, enable the use of IO uring if the platform supports it");
|
|
1104
|
+
extern "C" bool RocksDbIOUringEnable() { return FLAGS_io_uring_enabled; }
|
|
1105
|
+
#endif // ROCKSDB_LITE
|
|
1106
|
+
|
|
1107
|
+
DEFINE_bool(adaptive_readahead, false,
|
|
1108
|
+
"carry forward internal auto readahead size from one file to next "
|
|
1109
|
+
"file at each level during iteration");
|
|
1110
|
+
|
|
1111
|
+
DEFINE_bool(rate_limit_user_ops, false,
|
|
1112
|
+
"When true use Env::IO_USER priority level to charge internal rate "
|
|
1113
|
+
"limiter for reads associated with user operations.");
|
|
1114
|
+
|
|
1115
|
+
DEFINE_bool(file_checksum, false,
|
|
1116
|
+
"When true use FileChecksumGenCrc32cFactory for "
|
|
1117
|
+
"file_checksum_gen_factory.");
|
|
1118
|
+
|
|
1119
|
+
DEFINE_bool(rate_limit_auto_wal_flush, false,
|
|
1120
|
+
"When true use Env::IO_USER priority level to charge internal rate "
|
|
1121
|
+
"limiter for automatic WAL flush (`Options::manual_wal_flush` == "
|
|
1122
|
+
"false) after the user write operation.");
|
|
1123
|
+
|
|
1124
|
+
DEFINE_bool(async_io, false,
|
|
1125
|
+
"When set true, RocksDB does asynchronous reads for internal auto "
|
|
1126
|
+
"readahead prefetching.");
|
|
1127
|
+
|
|
1128
|
+
DEFINE_bool(reserve_table_reader_memory, false,
|
|
1129
|
+
"A dynamically updating charge to block cache, loosely based on "
|
|
1130
|
+
"the actual memory usage of table reader, will occur to account "
|
|
1131
|
+
"the memory, if block cache available.");
|
|
1132
|
+
|
|
886
1133
|
static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType(
|
|
887
1134
|
const char* ctype) {
|
|
888
1135
|
assert(ctype);
|
|
@@ -903,9 +1150,10 @@ static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType(
|
|
|
903
1150
|
return ROCKSDB_NAMESPACE::kXpressCompression;
|
|
904
1151
|
else if (!strcasecmp(ctype, "zstd"))
|
|
905
1152
|
return ROCKSDB_NAMESPACE::kZSTD;
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
1153
|
+
else {
|
|
1154
|
+
fprintf(stderr, "Cannot parse compression type '%s'\n", ctype);
|
|
1155
|
+
exit(1);
|
|
1156
|
+
}
|
|
909
1157
|
}
|
|
910
1158
|
|
|
911
1159
|
static std::string ColumnFamilyName(size_t i) {
|
|
@@ -948,10 +1196,14 @@ DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
|
|
|
948
1196
|
DEFINE_int32(compression_parallel_threads, 1,
|
|
949
1197
|
"Number of threads for parallel compression.");
|
|
950
1198
|
|
|
1199
|
+
DEFINE_uint64(compression_max_dict_buffer_bytes,
|
|
1200
|
+
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes,
|
|
1201
|
+
"Maximum bytes to buffer to collect samples for dictionary.");
|
|
1202
|
+
|
|
951
1203
|
static bool ValidateTableCacheNumshardbits(const char* flagname,
|
|
952
1204
|
int32_t value) {
|
|
953
|
-
if (0 >= value || value
|
|
954
|
-
fprintf(stderr, "Invalid value for --%s: %d, must be 0 < val
|
|
1205
|
+
if (0 >= value || value >= 20) {
|
|
1206
|
+
fprintf(stderr, "Invalid value for --%s: %d, must be 0 < val < 20\n",
|
|
955
1207
|
flagname, value);
|
|
956
1208
|
return false;
|
|
957
1209
|
}
|
|
@@ -961,16 +1213,20 @@ DEFINE_int32(table_cache_numshardbits, 4, "");
|
|
|
961
1213
|
|
|
962
1214
|
#ifndef ROCKSDB_LITE
|
|
963
1215
|
DEFINE_string(env_uri, "",
|
|
964
|
-
"URI for registry Env lookup. Mutually exclusive"
|
|
965
|
-
" with --hdfs and --fs_uri");
|
|
1216
|
+
"URI for registry Env lookup. Mutually exclusive with --fs_uri");
|
|
966
1217
|
DEFINE_string(fs_uri, "",
|
|
967
1218
|
"URI for registry Filesystem lookup. Mutually exclusive"
|
|
968
|
-
" with --
|
|
1219
|
+
" with --env_uri."
|
|
969
1220
|
" Creates a default environment with the specified filesystem.");
|
|
970
1221
|
#endif // ROCKSDB_LITE
|
|
971
|
-
DEFINE_string(
|
|
972
|
-
"
|
|
973
|
-
"
|
|
1222
|
+
DEFINE_string(simulate_hybrid_fs_file, "",
|
|
1223
|
+
"File for Store Metadata for Simulate hybrid FS. Empty means "
|
|
1224
|
+
"disable the feature. Now, if it is set, "
|
|
1225
|
+
"bottommost_temperature is set to kWarm.");
|
|
1226
|
+
DEFINE_int32(simulate_hybrid_hdd_multipliers, 1,
|
|
1227
|
+
"In simulate_hybrid_fs_file or simulate_hdd mode, how many HDDs "
|
|
1228
|
+
"are simulated.");
|
|
1229
|
+
DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD.");
|
|
974
1230
|
|
|
975
1231
|
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
|
|
976
1232
|
|
|
@@ -985,8 +1241,12 @@ DEFINE_int64(stats_interval_seconds, 0, "Report stats every N seconds. This "
|
|
|
985
1241
|
DEFINE_int32(stats_per_interval, 0, "Reports additional stats per interval when"
|
|
986
1242
|
" this is greater than 0.");
|
|
987
1243
|
|
|
1244
|
+
DEFINE_uint64(slow_usecs, 1000000,
|
|
1245
|
+
"A message is printed for operations that "
|
|
1246
|
+
"take at least this many microseconds.");
|
|
1247
|
+
|
|
988
1248
|
DEFINE_int64(report_interval_seconds, 0,
|
|
989
|
-
"If greater than zero, it will write simple stats in
|
|
1249
|
+
"If greater than zero, it will write simple stats in CSV format "
|
|
990
1250
|
"to --report_file every N seconds");
|
|
991
1251
|
|
|
992
1252
|
DEFINE_string(report_file, "report.csv",
|
|
@@ -1000,28 +1260,6 @@ DEFINE_int32(thread_status_per_interval, 0,
|
|
|
1000
1260
|
DEFINE_int32(perf_level, ROCKSDB_NAMESPACE::PerfLevel::kDisable,
|
|
1001
1261
|
"Level of perf collection");
|
|
1002
1262
|
|
|
1003
|
-
#ifndef ROCKSDB_LITE
|
|
1004
|
-
static ROCKSDB_NAMESPACE::Env* GetCompositeEnv(
|
|
1005
|
-
std::shared_ptr<ROCKSDB_NAMESPACE::FileSystem> fs) {
|
|
1006
|
-
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> composite_env =
|
|
1007
|
-
ROCKSDB_NAMESPACE::NewCompositeEnv(fs);
|
|
1008
|
-
return composite_env.get();
|
|
1009
|
-
}
|
|
1010
|
-
#endif
|
|
1011
|
-
|
|
1012
|
-
static bool ValidateRateLimit(const char* flagname, double value) {
|
|
1013
|
-
const double EPSILON = 1e-10;
|
|
1014
|
-
if ( value < -EPSILON ) {
|
|
1015
|
-
fprintf(stderr, "Invalid value for --%s: %12.6f, must be >= 0.0\n",
|
|
1016
|
-
flagname, value);
|
|
1017
|
-
return false;
|
|
1018
|
-
}
|
|
1019
|
-
return true;
|
|
1020
|
-
}
|
|
1021
|
-
DEFINE_double(soft_rate_limit, 0.0, "DEPRECATED");
|
|
1022
|
-
|
|
1023
|
-
DEFINE_double(hard_rate_limit, 0.0, "DEPRECATED");
|
|
1024
|
-
|
|
1025
1263
|
DEFINE_uint64(soft_pending_compaction_bytes_limit, 64ull * 1024 * 1024 * 1024,
|
|
1026
1264
|
"Slowdown writes if pending compaction bytes exceed this number");
|
|
1027
1265
|
|
|
@@ -1043,6 +1281,10 @@ DEFINE_bool(
|
|
|
1043
1281
|
DEFINE_bool(allow_concurrent_memtable_write, true,
|
|
1044
1282
|
"Allow multi-writers to update mem tables in parallel.");
|
|
1045
1283
|
|
|
1284
|
+
DEFINE_double(experimental_mempurge_threshold, 0.0,
|
|
1285
|
+
"Maximum useful payload ratio estimate that triggers a mempurge "
|
|
1286
|
+
"(memtable garbage collection).");
|
|
1287
|
+
|
|
1046
1288
|
DEFINE_bool(inplace_update_support,
|
|
1047
1289
|
ROCKSDB_NAMESPACE::Options().inplace_update_support,
|
|
1048
1290
|
"Support in-place memtable update for smaller or same-size values");
|
|
@@ -1062,12 +1304,11 @@ DEFINE_uint64(write_thread_slow_yield_usec, 3,
|
|
|
1062
1304
|
"The threshold at which a slow yield is considered a signal that "
|
|
1063
1305
|
"other processes or threads want the core.");
|
|
1064
1306
|
|
|
1065
|
-
DEFINE_int32(rate_limit_delay_max_milliseconds, 1000,
|
|
1066
|
-
"When hard_rate_limit is set then this is the max time a put will"
|
|
1067
|
-
" be stalled.");
|
|
1068
|
-
|
|
1069
1307
|
DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value.");
|
|
1070
1308
|
|
|
1309
|
+
DEFINE_int64(rate_limiter_refill_period_us, 100 * 1000,
|
|
1310
|
+
"Set refill period on rate limiter.");
|
|
1311
|
+
|
|
1071
1312
|
DEFINE_bool(rate_limiter_auto_tuned, false,
|
|
1072
1313
|
"Enable dynamic adjustment of rate limit according to demand for "
|
|
1073
1314
|
"background I/O");
|
|
@@ -1114,30 +1355,31 @@ DEFINE_double(keyrange_dist_d, 0.0,
|
|
|
1114
1355
|
"f(x)=a*exp(b*x)+c*exp(d*x)");
|
|
1115
1356
|
DEFINE_int64(keyrange_num, 1,
|
|
1116
1357
|
"The number of key ranges that are in the same prefix "
|
|
1117
|
-
"group, each prefix range will have its key access "
|
|
1118
|
-
"distribution");
|
|
1358
|
+
"group, each prefix range will have its key access distribution");
|
|
1119
1359
|
DEFINE_double(key_dist_a, 0.0,
|
|
1120
|
-
"The parameter 'a' of key access distribution model "
|
|
1121
|
-
"f(x)=a*x^b");
|
|
1360
|
+
"The parameter 'a' of key access distribution model f(x)=a*x^b");
|
|
1122
1361
|
DEFINE_double(key_dist_b, 0.0,
|
|
1123
|
-
"The parameter 'b' of key access distribution model "
|
|
1124
|
-
"f(x)=a*x^b");
|
|
1362
|
+
"The parameter 'b' of key access distribution model f(x)=a*x^b");
|
|
1125
1363
|
DEFINE_double(value_theta, 0.0,
|
|
1126
1364
|
"The parameter 'theta' of Generized Pareto Distribution "
|
|
1127
1365
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1128
|
-
|
|
1366
|
+
// Use reasonable defaults based on the mixgraph paper
|
|
1367
|
+
DEFINE_double(value_k, 0.2615,
|
|
1129
1368
|
"The parameter 'k' of Generized Pareto Distribution "
|
|
1130
1369
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1131
|
-
|
|
1370
|
+
// Use reasonable defaults based on the mixgraph paper
|
|
1371
|
+
DEFINE_double(value_sigma, 25.45,
|
|
1132
1372
|
"The parameter 'theta' of Generized Pareto Distribution "
|
|
1133
1373
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1134
1374
|
DEFINE_double(iter_theta, 0.0,
|
|
1135
1375
|
"The parameter 'theta' of Generized Pareto Distribution "
|
|
1136
1376
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1137
|
-
|
|
1377
|
+
// Use reasonable defaults based on the mixgraph paper
|
|
1378
|
+
DEFINE_double(iter_k, 2.517,
|
|
1138
1379
|
"The parameter 'k' of Generized Pareto Distribution "
|
|
1139
1380
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1140
|
-
|
|
1381
|
+
// Use reasonable defaults based on the mixgraph paper
|
|
1382
|
+
DEFINE_double(iter_sigma, 14.236,
|
|
1141
1383
|
"The parameter 'sigma' of Generized Pareto Distribution "
|
|
1142
1384
|
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
|
|
1143
1385
|
DEFINE_double(mix_get_ratio, 1.0,
|
|
@@ -1147,8 +1389,6 @@ DEFINE_double(mix_put_ratio, 0.0,
|
|
|
1147
1389
|
DEFINE_double(mix_seek_ratio, 0.0,
|
|
1148
1390
|
"The ratio of Seek queries of mix_graph workload");
|
|
1149
1391
|
DEFINE_int64(mix_max_scan_len, 10000, "The max scan length of Iterator");
|
|
1150
|
-
DEFINE_int64(mix_ave_kv_size, 512,
|
|
1151
|
-
"The average key-value size of this workload");
|
|
1152
1392
|
DEFINE_int64(mix_max_value_size, 1024, "The max value size of this workload");
|
|
1153
1393
|
DEFINE_double(
|
|
1154
1394
|
sine_mix_rate_noise, 0.0,
|
|
@@ -1296,34 +1536,13 @@ DEFINE_bool(persist_stats_to_disk,
|
|
|
1296
1536
|
DEFINE_uint64(stats_history_buffer_size,
|
|
1297
1537
|
ROCKSDB_NAMESPACE::Options().stats_history_buffer_size,
|
|
1298
1538
|
"Max number of stats snapshots to keep in memory");
|
|
1539
|
+
DEFINE_bool(avoid_flush_during_recovery,
|
|
1540
|
+
ROCKSDB_NAMESPACE::Options().avoid_flush_during_recovery,
|
|
1541
|
+
"If true, avoids flushing the recovered WAL data where possible.");
|
|
1299
1542
|
DEFINE_int64(multiread_stride, 0,
|
|
1300
1543
|
"Stride length for the keys in a MultiGet batch");
|
|
1301
1544
|
DEFINE_bool(multiread_batched, false, "Use the new MultiGet API");
|
|
1302
1545
|
|
|
1303
|
-
enum RepFactory {
|
|
1304
|
-
kSkipList,
|
|
1305
|
-
kPrefixHash,
|
|
1306
|
-
kVectorRep,
|
|
1307
|
-
kHashLinkedList,
|
|
1308
|
-
};
|
|
1309
|
-
|
|
1310
|
-
static enum RepFactory StringToRepFactory(const char* ctype) {
|
|
1311
|
-
assert(ctype);
|
|
1312
|
-
|
|
1313
|
-
if (!strcasecmp(ctype, "skip_list"))
|
|
1314
|
-
return kSkipList;
|
|
1315
|
-
else if (!strcasecmp(ctype, "prefix_hash"))
|
|
1316
|
-
return kPrefixHash;
|
|
1317
|
-
else if (!strcasecmp(ctype, "vector"))
|
|
1318
|
-
return kVectorRep;
|
|
1319
|
-
else if (!strcasecmp(ctype, "hash_linkedlist"))
|
|
1320
|
-
return kHashLinkedList;
|
|
1321
|
-
|
|
1322
|
-
fprintf(stdout, "Cannot parse memreptable %s\n", ctype);
|
|
1323
|
-
return kSkipList;
|
|
1324
|
-
}
|
|
1325
|
-
|
|
1326
|
-
static enum RepFactory FLAGS_rep_factory;
|
|
1327
1546
|
DEFINE_string(memtablerep, "skip_list", "");
|
|
1328
1547
|
DEFINE_int64(hash_bucket_count, 1024 * 1024, "hash bucket count");
|
|
1329
1548
|
DEFINE_bool(use_plain_table, false, "if use plain table "
|
|
@@ -1345,17 +1564,18 @@ DEFINE_int32(skip_list_lookahead, 0, "Used with skip_list memtablerep; try "
|
|
|
1345
1564
|
"position");
|
|
1346
1565
|
DEFINE_bool(report_file_operations, false, "if report number of file "
|
|
1347
1566
|
"operations");
|
|
1567
|
+
DEFINE_bool(report_open_timing, false, "if report open timing");
|
|
1348
1568
|
DEFINE_int32(readahead_size, 0, "Iterator readahead size");
|
|
1349
1569
|
|
|
1350
1570
|
DEFINE_bool(read_with_latest_user_timestamp, true,
|
|
1351
1571
|
"If true, always use the current latest timestamp for read. If "
|
|
1352
1572
|
"false, choose a random timestamp from the past.");
|
|
1353
1573
|
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
static
|
|
1358
|
-
|
|
1574
|
+
#ifndef ROCKSDB_LITE
|
|
1575
|
+
DEFINE_string(secondary_cache_uri, "",
|
|
1576
|
+
"Full URI for creating a custom secondary cache object");
|
|
1577
|
+
static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;
|
|
1578
|
+
#endif // ROCKSDB_LITE
|
|
1359
1579
|
|
|
1360
1580
|
static const bool FLAGS_prefix_size_dummy __attribute__((__unused__)) =
|
|
1361
1581
|
RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize);
|
|
@@ -1380,131 +1600,32 @@ static const bool FLAGS_table_cache_numshardbits_dummy __attribute__((__unused__
|
|
|
1380
1600
|
&ValidateTableCacheNumshardbits);
|
|
1381
1601
|
|
|
1382
1602
|
namespace ROCKSDB_NAMESPACE {
|
|
1383
|
-
|
|
1384
1603
|
namespace {
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
}
|
|
1405
|
-
|
|
1406
|
-
Status NewSequentialFile(const std::string& f,
|
|
1407
|
-
std::unique_ptr<SequentialFile>* r,
|
|
1408
|
-
const EnvOptions& soptions) override {
|
|
1409
|
-
class CountingFile : public SequentialFile {
|
|
1410
|
-
private:
|
|
1411
|
-
std::unique_ptr<SequentialFile> target_;
|
|
1412
|
-
ReportFileOpCounters* counters_;
|
|
1413
|
-
|
|
1414
|
-
public:
|
|
1415
|
-
CountingFile(std::unique_ptr<SequentialFile>&& target,
|
|
1416
|
-
ReportFileOpCounters* counters)
|
|
1417
|
-
: target_(std::move(target)), counters_(counters) {}
|
|
1418
|
-
|
|
1419
|
-
Status Read(size_t n, Slice* result, char* scratch) override {
|
|
1420
|
-
counters_->read_counter_.fetch_add(1, std::memory_order_relaxed);
|
|
1421
|
-
Status rv = target_->Read(n, result, scratch);
|
|
1422
|
-
counters_->bytes_read_.fetch_add(result->size(),
|
|
1423
|
-
std::memory_order_relaxed);
|
|
1424
|
-
return rv;
|
|
1425
|
-
}
|
|
1426
|
-
|
|
1427
|
-
Status Skip(uint64_t n) override { return target_->Skip(n); }
|
|
1428
|
-
};
|
|
1429
|
-
|
|
1430
|
-
Status s = target()->NewSequentialFile(f, r, soptions);
|
|
1431
|
-
if (s.ok()) {
|
|
1432
|
-
counters()->open_counter_.fetch_add(1, std::memory_order_relaxed);
|
|
1433
|
-
r->reset(new CountingFile(std::move(*r), counters()));
|
|
1434
|
-
}
|
|
1435
|
-
return s;
|
|
1436
|
-
}
|
|
1437
|
-
|
|
1438
|
-
Status NewRandomAccessFile(const std::string& f,
|
|
1439
|
-
std::unique_ptr<RandomAccessFile>* r,
|
|
1440
|
-
const EnvOptions& soptions) override {
|
|
1441
|
-
class CountingFile : public RandomAccessFile {
|
|
1442
|
-
private:
|
|
1443
|
-
std::unique_ptr<RandomAccessFile> target_;
|
|
1444
|
-
ReportFileOpCounters* counters_;
|
|
1445
|
-
|
|
1446
|
-
public:
|
|
1447
|
-
CountingFile(std::unique_ptr<RandomAccessFile>&& target,
|
|
1448
|
-
ReportFileOpCounters* counters)
|
|
1449
|
-
: target_(std::move(target)), counters_(counters) {}
|
|
1450
|
-
Status Read(uint64_t offset, size_t n, Slice* result,
|
|
1451
|
-
char* scratch) const override {
|
|
1452
|
-
counters_->read_counter_.fetch_add(1, std::memory_order_relaxed);
|
|
1453
|
-
Status rv = target_->Read(offset, n, result, scratch);
|
|
1454
|
-
counters_->bytes_read_.fetch_add(result->size(),
|
|
1455
|
-
std::memory_order_relaxed);
|
|
1456
|
-
return rv;
|
|
1457
|
-
}
|
|
1458
|
-
};
|
|
1459
|
-
|
|
1460
|
-
Status s = target()->NewRandomAccessFile(f, r, soptions);
|
|
1461
|
-
if (s.ok()) {
|
|
1462
|
-
counters()->open_counter_.fetch_add(1, std::memory_order_relaxed);
|
|
1463
|
-
r->reset(new CountingFile(std::move(*r), counters()));
|
|
1464
|
-
}
|
|
1465
|
-
return s;
|
|
1466
|
-
}
|
|
1467
|
-
|
|
1468
|
-
Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
|
|
1469
|
-
const EnvOptions& soptions) override {
|
|
1470
|
-
class CountingFile : public WritableFile {
|
|
1471
|
-
private:
|
|
1472
|
-
std::unique_ptr<WritableFile> target_;
|
|
1473
|
-
ReportFileOpCounters* counters_;
|
|
1474
|
-
|
|
1475
|
-
public:
|
|
1476
|
-
CountingFile(std::unique_ptr<WritableFile>&& target,
|
|
1477
|
-
ReportFileOpCounters* counters)
|
|
1478
|
-
: target_(std::move(target)), counters_(counters) {}
|
|
1479
|
-
|
|
1480
|
-
Status Append(const Slice& data) override {
|
|
1481
|
-
counters_->append_counter_.fetch_add(1, std::memory_order_relaxed);
|
|
1482
|
-
Status rv = target_->Append(data);
|
|
1483
|
-
counters_->bytes_written_.fetch_add(data.size(),
|
|
1484
|
-
std::memory_order_relaxed);
|
|
1485
|
-
return rv;
|
|
1486
|
-
}
|
|
1487
|
-
|
|
1488
|
-
Status Truncate(uint64_t size) override { return target_->Truncate(size); }
|
|
1489
|
-
Status Close() override { return target_->Close(); }
|
|
1490
|
-
Status Flush() override { return target_->Flush(); }
|
|
1491
|
-
Status Sync() override { return target_->Sync(); }
|
|
1492
|
-
};
|
|
1493
|
-
|
|
1494
|
-
Status s = target()->NewWritableFile(f, r, soptions);
|
|
1604
|
+
static Status CreateMemTableRepFactory(
|
|
1605
|
+
const ConfigOptions& config_options,
|
|
1606
|
+
std::shared_ptr<MemTableRepFactory>* factory) {
|
|
1607
|
+
Status s;
|
|
1608
|
+
if (!strcasecmp(FLAGS_memtablerep.c_str(), SkipListFactory::kNickName())) {
|
|
1609
|
+
factory->reset(new SkipListFactory(FLAGS_skip_list_lookahead));
|
|
1610
|
+
#ifndef ROCKSDB_LITE
|
|
1611
|
+
} else if (!strcasecmp(FLAGS_memtablerep.c_str(), "prefix_hash")) {
|
|
1612
|
+
factory->reset(NewHashSkipListRepFactory(FLAGS_hash_bucket_count));
|
|
1613
|
+
} else if (!strcasecmp(FLAGS_memtablerep.c_str(),
|
|
1614
|
+
VectorRepFactory::kNickName())) {
|
|
1615
|
+
factory->reset(new VectorRepFactory());
|
|
1616
|
+
} else if (!strcasecmp(FLAGS_memtablerep.c_str(), "hash_linkedlist")) {
|
|
1617
|
+
factory->reset(NewHashLinkListRepFactory(FLAGS_hash_bucket_count));
|
|
1618
|
+
#endif // ROCKSDB_LITE
|
|
1619
|
+
} else {
|
|
1620
|
+
std::unique_ptr<MemTableRepFactory> unique;
|
|
1621
|
+
s = MemTableRepFactory::CreateFromString(config_options, FLAGS_memtablerep,
|
|
1622
|
+
&unique);
|
|
1495
1623
|
if (s.ok()) {
|
|
1496
|
-
|
|
1497
|
-
r->reset(new CountingFile(std::move(*r), counters()));
|
|
1624
|
+
factory->reset(unique.release());
|
|
1498
1625
|
}
|
|
1499
|
-
return s;
|
|
1500
1626
|
}
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
ReportFileOpCounters* counters() { return &counters_; }
|
|
1504
|
-
|
|
1505
|
-
private:
|
|
1506
|
-
ReportFileOpCounters counters_;
|
|
1507
|
-
};
|
|
1627
|
+
return s;
|
|
1628
|
+
}
|
|
1508
1629
|
|
|
1509
1630
|
} // namespace
|
|
1510
1631
|
|
|
@@ -1527,7 +1648,7 @@ static enum DistributionType StringToDistributionType(const char* ctype) {
|
|
|
1527
1648
|
return kNormal;
|
|
1528
1649
|
|
|
1529
1650
|
fprintf(stdout, "Cannot parse distribution type '%s'\n", ctype);
|
|
1530
|
-
|
|
1651
|
+
exit(1);
|
|
1531
1652
|
}
|
|
1532
1653
|
|
|
1533
1654
|
class BaseDistribution {
|
|
@@ -1766,7 +1887,7 @@ struct DBWithColumnFamilies {
|
|
|
1766
1887
|
}
|
|
1767
1888
|
};
|
|
1768
1889
|
|
|
1769
|
-
//
|
|
1890
|
+
// A class that reports stats to CSV file.
|
|
1770
1891
|
class ReporterAgent {
|
|
1771
1892
|
public:
|
|
1772
1893
|
ReporterAgent(Env* env, const std::string& fname,
|
|
@@ -1809,7 +1930,8 @@ class ReporterAgent {
|
|
|
1809
1930
|
private:
|
|
1810
1931
|
std::string Header() const { return "secs_elapsed,interval_qps"; }
|
|
1811
1932
|
void SleepAndReport() {
|
|
1812
|
-
auto
|
|
1933
|
+
auto* clock = env_->GetSystemClock().get();
|
|
1934
|
+
auto time_started = clock->NowMicros();
|
|
1813
1935
|
while (true) {
|
|
1814
1936
|
{
|
|
1815
1937
|
std::unique_lock<std::mutex> lk(mutex_);
|
|
@@ -1824,7 +1946,7 @@ class ReporterAgent {
|
|
|
1824
1946
|
auto total_ops_done_snapshot = total_ops_done_.load();
|
|
1825
1947
|
// round the seconds elapsed
|
|
1826
1948
|
auto secs_elapsed =
|
|
1827
|
-
(
|
|
1949
|
+
(clock->NowMicros() - time_started + kMicrosInSecond / 2) /
|
|
1828
1950
|
kMicrosInSecond;
|
|
1829
1951
|
std::string report = ToString(secs_elapsed) + "," +
|
|
1830
1952
|
ToString(total_ops_done_snapshot - last_report_) +
|
|
@@ -1887,6 +2009,7 @@ static std::unordered_map<OperationType, std::string, std::hash<unsigned char>>
|
|
|
1887
2009
|
class CombinedStats;
|
|
1888
2010
|
class Stats {
|
|
1889
2011
|
private:
|
|
2012
|
+
SystemClock* clock_;
|
|
1890
2013
|
int id_;
|
|
1891
2014
|
uint64_t start_ = 0;
|
|
1892
2015
|
uint64_t sine_interval_;
|
|
@@ -1906,7 +2029,7 @@ class Stats {
|
|
|
1906
2029
|
friend class CombinedStats;
|
|
1907
2030
|
|
|
1908
2031
|
public:
|
|
1909
|
-
Stats() { Start(-1); }
|
|
2032
|
+
Stats() : clock_(FLAGS_env->GetSystemClock().get()) { Start(-1); }
|
|
1910
2033
|
|
|
1911
2034
|
void SetReporterAgent(ReporterAgent* reporter_agent) {
|
|
1912
2035
|
reporter_agent_ = reporter_agent;
|
|
@@ -1921,8 +2044,8 @@ class Stats {
|
|
|
1921
2044
|
last_report_done_ = 0;
|
|
1922
2045
|
bytes_ = 0;
|
|
1923
2046
|
seconds_ = 0;
|
|
1924
|
-
start_ =
|
|
1925
|
-
sine_interval_ =
|
|
2047
|
+
start_ = clock_->NowMicros();
|
|
2048
|
+
sine_interval_ = clock_->NowMicros();
|
|
1926
2049
|
finish_ = start_;
|
|
1927
2050
|
last_report_finish_ = start_;
|
|
1928
2051
|
message_.clear();
|
|
@@ -1949,12 +2072,12 @@ class Stats {
|
|
|
1949
2072
|
if (other.start_ < start_) start_ = other.start_;
|
|
1950
2073
|
if (other.finish_ > finish_) finish_ = other.finish_;
|
|
1951
2074
|
|
|
1952
|
-
// Just keep the messages from one thread
|
|
2075
|
+
// Just keep the messages from one thread.
|
|
1953
2076
|
if (message_.empty()) message_ = other.message_;
|
|
1954
2077
|
}
|
|
1955
2078
|
|
|
1956
2079
|
void Stop() {
|
|
1957
|
-
finish_ =
|
|
2080
|
+
finish_ = clock_->NowMicros();
|
|
1958
2081
|
seconds_ = (finish_ - start_) * 1e-6;
|
|
1959
2082
|
}
|
|
1960
2083
|
|
|
@@ -1974,7 +2097,7 @@ class Stats {
|
|
|
1974
2097
|
"ElapsedTime", "Stage", "State", "OperationProperties");
|
|
1975
2098
|
|
|
1976
2099
|
int64_t current_time = 0;
|
|
1977
|
-
|
|
2100
|
+
clock_->GetCurrentTime(¤t_time).PermitUncheckedError();
|
|
1978
2101
|
for (auto ts : thread_list) {
|
|
1979
2102
|
fprintf(stderr, "%18" PRIu64 " %10s %12s %20s %13s %45s %12s",
|
|
1980
2103
|
ts.thread_id,
|
|
@@ -1995,9 +2118,7 @@ class Stats {
|
|
|
1995
2118
|
}
|
|
1996
2119
|
}
|
|
1997
2120
|
|
|
1998
|
-
void ResetSineInterval() {
|
|
1999
|
-
sine_interval_ = FLAGS_env->NowMicros();
|
|
2000
|
-
}
|
|
2121
|
+
void ResetSineInterval() { sine_interval_ = clock_->NowMicros(); }
|
|
2001
2122
|
|
|
2002
2123
|
uint64_t GetSineInterval() {
|
|
2003
2124
|
return sine_interval_;
|
|
@@ -2008,8 +2129,8 @@ class Stats {
|
|
|
2008
2129
|
}
|
|
2009
2130
|
|
|
2010
2131
|
void ResetLastOpTime() {
|
|
2011
|
-
// Set to now to avoid latency from calls to SleepForMicroseconds
|
|
2012
|
-
last_op_finish_ =
|
|
2132
|
+
// Set to now to avoid latency from calls to SleepForMicroseconds.
|
|
2133
|
+
last_op_finish_ = clock_->NowMicros();
|
|
2013
2134
|
}
|
|
2014
2135
|
|
|
2015
2136
|
void FinishedOps(DBWithColumnFamilies* db_with_cfh, DB* db, int64_t num_ops,
|
|
@@ -2018,7 +2139,7 @@ class Stats {
|
|
|
2018
2139
|
reporter_agent_->ReportFinishedOps(num_ops);
|
|
2019
2140
|
}
|
|
2020
2141
|
if (FLAGS_histogram) {
|
|
2021
|
-
uint64_t now =
|
|
2142
|
+
uint64_t now = clock_->NowMicros();
|
|
2022
2143
|
uint64_t micros = now - last_op_finish_;
|
|
2023
2144
|
|
|
2024
2145
|
if (hist_.find(op_type) == hist_.end())
|
|
@@ -2028,7 +2149,7 @@ class Stats {
|
|
|
2028
2149
|
}
|
|
2029
2150
|
hist_[op_type]->Add(micros);
|
|
2030
2151
|
|
|
2031
|
-
if (micros
|
|
2152
|
+
if (micros >= FLAGS_slow_usecs && !FLAGS_stats_interval) {
|
|
2032
2153
|
fprintf(stderr, "long op: %" PRIu64 " micros%30s\r", micros, "");
|
|
2033
2154
|
fflush(stderr);
|
|
2034
2155
|
}
|
|
@@ -2047,7 +2168,7 @@ class Stats {
|
|
|
2047
2168
|
else next_report_ += 100000;
|
|
2048
2169
|
fprintf(stderr, "... finished %" PRIu64 " ops%30s\r", done_, "");
|
|
2049
2170
|
} else {
|
|
2050
|
-
uint64_t now =
|
|
2171
|
+
uint64_t now = clock_->NowMicros();
|
|
2051
2172
|
int64_t usecs_since_last = now - last_report_finish_;
|
|
2052
2173
|
|
|
2053
2174
|
// Determine whether to print status where interval is either
|
|
@@ -2055,19 +2176,17 @@ class Stats {
|
|
|
2055
2176
|
|
|
2056
2177
|
if (FLAGS_stats_interval_seconds &&
|
|
2057
2178
|
usecs_since_last < (FLAGS_stats_interval_seconds * 1000000)) {
|
|
2058
|
-
// Don't check again for this many operations
|
|
2179
|
+
// Don't check again for this many operations.
|
|
2059
2180
|
next_report_ += FLAGS_stats_interval;
|
|
2060
2181
|
|
|
2061
2182
|
} else {
|
|
2062
|
-
|
|
2063
2183
|
fprintf(stderr,
|
|
2064
|
-
"%s ... thread %d: (%" PRIu64 ",%" PRIu64
|
|
2184
|
+
"%s ... thread %d: (%" PRIu64 ",%" PRIu64
|
|
2185
|
+
") ops and "
|
|
2065
2186
|
"(%.1f,%.1f) ops/second in (%.6f,%.6f) seconds\n",
|
|
2066
|
-
|
|
2067
|
-
id_,
|
|
2187
|
+
clock_->TimeToString(now / 1000000).c_str(), id_,
|
|
2068
2188
|
done_ - last_report_done_, done_,
|
|
2069
|
-
(done_ - last_report_done_) /
|
|
2070
|
-
(usecs_since_last / 1000000.0),
|
|
2189
|
+
(done_ - last_report_done_) / (usecs_since_last / 1000000.0),
|
|
2071
2190
|
done_ / ((now - start_) / 1000000.0),
|
|
2072
2191
|
(now - last_report_finish_) / 1000000.0,
|
|
2073
2192
|
(now - start_) / 1000000.0);
|
|
@@ -2097,7 +2216,13 @@ class Stats {
|
|
|
2097
2216
|
}
|
|
2098
2217
|
} else if (db) {
|
|
2099
2218
|
if (db->GetProperty("rocksdb.stats", &stats)) {
|
|
2100
|
-
fprintf(stderr, "%s
|
|
2219
|
+
fprintf(stderr, "%s", stats.c_str());
|
|
2220
|
+
}
|
|
2221
|
+
if (db->GetProperty("rocksdb.num-running-compactions", &stats)) {
|
|
2222
|
+
fprintf(stderr, "num-running-compactions: %s\n", stats.c_str());
|
|
2223
|
+
}
|
|
2224
|
+
if (db->GetProperty("rocksdb.num-running-flushes", &stats)) {
|
|
2225
|
+
fprintf(stderr, "num-running-flushes: %s\n\n", stats.c_str());
|
|
2101
2226
|
}
|
|
2102
2227
|
if (FLAGS_show_table_properties) {
|
|
2103
2228
|
for (int level = 0; level < FLAGS_num_levels; ++level) {
|
|
@@ -2163,19 +2288,11 @@ class Stats {
|
|
|
2163
2288
|
}
|
|
2164
2289
|
}
|
|
2165
2290
|
if (FLAGS_report_file_operations) {
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
counters->read_counter_.load(std::memory_order_relaxed));
|
|
2172
|
-
fprintf(stdout, "Num Append(): %d\n",
|
|
2173
|
-
counters->append_counter_.load(std::memory_order_relaxed));
|
|
2174
|
-
fprintf(stdout, "Num bytes read: %" PRIu64 "\n",
|
|
2175
|
-
counters->bytes_read_.load(std::memory_order_relaxed));
|
|
2176
|
-
fprintf(stdout, "Num bytes written: %" PRIu64 "\n",
|
|
2177
|
-
counters->bytes_written_.load(std::memory_order_relaxed));
|
|
2178
|
-
env->reset();
|
|
2291
|
+
auto* counted_fs =
|
|
2292
|
+
FLAGS_env->GetFileSystem()->CheckedCast<CountedFileSystem>();
|
|
2293
|
+
assert(counted_fs);
|
|
2294
|
+
fprintf(stdout, "%s", counted_fs->PrintCounters().c_str());
|
|
2295
|
+
counted_fs->ResetCounters();
|
|
2179
2296
|
}
|
|
2180
2297
|
fflush(stdout);
|
|
2181
2298
|
}
|
|
@@ -2308,8 +2425,8 @@ struct ThreadState {
|
|
|
2308
2425
|
Stats stats;
|
|
2309
2426
|
SharedState* shared;
|
|
2310
2427
|
|
|
2311
|
-
explicit ThreadState(int index)
|
|
2312
|
-
: tid(index), rand(
|
|
2428
|
+
explicit ThreadState(int index, int my_seed)
|
|
2429
|
+
: tid(index), rand(seed_base + my_seed) {}
|
|
2313
2430
|
};
|
|
2314
2431
|
|
|
2315
2432
|
class Duration {
|
|
@@ -2354,7 +2471,6 @@ class Benchmark {
|
|
|
2354
2471
|
private:
|
|
2355
2472
|
std::shared_ptr<Cache> cache_;
|
|
2356
2473
|
std::shared_ptr<Cache> compressed_cache_;
|
|
2357
|
-
std::shared_ptr<const FilterPolicy> filter_policy_;
|
|
2358
2474
|
const SliceTransform* prefix_extractor_;
|
|
2359
2475
|
DBWithColumnFamilies db_;
|
|
2360
2476
|
std::vector<DBWithColumnFamilies> multi_dbs_;
|
|
@@ -2362,12 +2478,14 @@ class Benchmark {
|
|
|
2362
2478
|
int key_size_;
|
|
2363
2479
|
int user_timestamp_size_;
|
|
2364
2480
|
int prefix_size_;
|
|
2481
|
+
int total_thread_count_;
|
|
2365
2482
|
int64_t keys_per_prefix_;
|
|
2366
2483
|
int64_t entries_per_batch_;
|
|
2367
2484
|
int64_t writes_before_delete_range_;
|
|
2368
2485
|
int64_t writes_per_range_tombstone_;
|
|
2369
2486
|
int64_t range_tombstone_width_;
|
|
2370
2487
|
int64_t max_num_range_tombstones_;
|
|
2488
|
+
ReadOptions read_options_;
|
|
2371
2489
|
WriteOptions write_options_;
|
|
2372
2490
|
Options open_options_; // keep options around to properly destroy db later
|
|
2373
2491
|
#ifndef ROCKSDB_LITE
|
|
@@ -2381,7 +2499,8 @@ class Benchmark {
|
|
|
2381
2499
|
int64_t readwrites_;
|
|
2382
2500
|
int64_t merge_keys_;
|
|
2383
2501
|
bool report_file_operations_;
|
|
2384
|
-
bool use_blob_db_;
|
|
2502
|
+
bool use_blob_db_; // Stacked BlobDB
|
|
2503
|
+
bool read_operands_; // read via GetMergeOperands()
|
|
2385
2504
|
std::vector<std::string> keys_;
|
|
2386
2505
|
|
|
2387
2506
|
class ErrorHandlerListener : public EventListener {
|
|
@@ -2395,6 +2514,9 @@ class Benchmark {
|
|
|
2395
2514
|
|
|
2396
2515
|
~ErrorHandlerListener() override {}
|
|
2397
2516
|
|
|
2517
|
+
const char* Name() const override { return kClassName(); }
|
|
2518
|
+
static const char* kClassName() { return "ErrorHandlerListener"; }
|
|
2519
|
+
|
|
2398
2520
|
void OnErrorRecoveryBegin(BackgroundErrorReason /*reason*/,
|
|
2399
2521
|
Status /*bg_error*/,
|
|
2400
2522
|
bool* auto_recovery) override {
|
|
@@ -2454,7 +2576,7 @@ class Benchmark {
|
|
|
2454
2576
|
compressed);
|
|
2455
2577
|
}
|
|
2456
2578
|
|
|
2457
|
-
void PrintHeader() {
|
|
2579
|
+
void PrintHeader(const Options& options) {
|
|
2458
2580
|
PrintEnvironment();
|
|
2459
2581
|
fprintf(stdout,
|
|
2460
2582
|
"Keys: %d bytes each (+ %d bytes user-defined timestamp)\n",
|
|
@@ -2504,20 +2626,9 @@ class Benchmark {
|
|
|
2504
2626
|
fprintf(stdout, "Compression: %s\n", compression.c_str());
|
|
2505
2627
|
fprintf(stdout, "Compression sampling rate: %" PRId64 "\n",
|
|
2506
2628
|
FLAGS_sample_for_compression);
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
fprintf(stdout, "Memtablerep: prefix_hash\n");
|
|
2511
|
-
break;
|
|
2512
|
-
case kSkipList:
|
|
2513
|
-
fprintf(stdout, "Memtablerep: skip_list\n");
|
|
2514
|
-
break;
|
|
2515
|
-
case kVectorRep:
|
|
2516
|
-
fprintf(stdout, "Memtablerep: vector\n");
|
|
2517
|
-
break;
|
|
2518
|
-
case kHashLinkedList:
|
|
2519
|
-
fprintf(stdout, "Memtablerep: hash_linkedlist\n");
|
|
2520
|
-
break;
|
|
2629
|
+
if (options.memtable_factory != nullptr) {
|
|
2630
|
+
fprintf(stdout, "Memtablerep: %s\n",
|
|
2631
|
+
options.memtable_factory->GetId().c_str());
|
|
2521
2632
|
}
|
|
2522
2633
|
fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level);
|
|
2523
2634
|
|
|
@@ -2576,7 +2687,7 @@ class Benchmark {
|
|
|
2576
2687
|
fprintf(stderr, "RocksDB: version %d.%d\n",
|
|
2577
2688
|
kMajorVersion, kMinorVersion);
|
|
2578
2689
|
|
|
2579
|
-
#if defined(__linux)
|
|
2690
|
+
#if defined(__linux) || defined(__APPLE__) || defined(__FreeBSD__)
|
|
2580
2691
|
time_t now = time(nullptr);
|
|
2581
2692
|
char buf[52];
|
|
2582
2693
|
// Lint complains about ctime() usage, so replace it with ctime_r(). The
|
|
@@ -2584,6 +2695,7 @@ class Benchmark {
|
|
|
2584
2695
|
fprintf(stderr, "Date: %s",
|
|
2585
2696
|
ctime_r(&now, buf)); // ctime_r() adds newline
|
|
2586
2697
|
|
|
2698
|
+
#if defined(__linux)
|
|
2587
2699
|
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
|
|
2588
2700
|
if (cpuinfo != nullptr) {
|
|
2589
2701
|
char line[1000];
|
|
@@ -2608,6 +2720,45 @@ class Benchmark {
|
|
|
2608
2720
|
fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
|
|
2609
2721
|
fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
|
|
2610
2722
|
}
|
|
2723
|
+
#elif defined(__APPLE__)
|
|
2724
|
+
struct host_basic_info h;
|
|
2725
|
+
size_t hlen = HOST_BASIC_INFO_COUNT;
|
|
2726
|
+
if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&h,
|
|
2727
|
+
(uint32_t*)&hlen) == KERN_SUCCESS) {
|
|
2728
|
+
std::string cpu_type;
|
|
2729
|
+
std::string cache_size;
|
|
2730
|
+
size_t hcache_size;
|
|
2731
|
+
hlen = sizeof(hcache_size);
|
|
2732
|
+
if (sysctlbyname("hw.cachelinesize", &hcache_size, &hlen, NULL, 0) == 0) {
|
|
2733
|
+
cache_size = std::to_string(hcache_size);
|
|
2734
|
+
}
|
|
2735
|
+
switch (h.cpu_type) {
|
|
2736
|
+
case CPU_TYPE_X86_64:
|
|
2737
|
+
cpu_type = "x86_64";
|
|
2738
|
+
break;
|
|
2739
|
+
case CPU_TYPE_ARM64:
|
|
2740
|
+
cpu_type = "arm64";
|
|
2741
|
+
break;
|
|
2742
|
+
default:
|
|
2743
|
+
break;
|
|
2744
|
+
}
|
|
2745
|
+
fprintf(stderr, "CPU: %d * %s\n", h.max_cpus, cpu_type.c_str());
|
|
2746
|
+
fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
|
|
2747
|
+
}
|
|
2748
|
+
#elif defined(__FreeBSD__)
|
|
2749
|
+
int ncpus;
|
|
2750
|
+
size_t len = sizeof(ncpus);
|
|
2751
|
+
int mib[2] = {CTL_HW, HW_NCPU};
|
|
2752
|
+
if (sysctl(mib, 2, &ncpus, &len, nullptr, 0) == 0) {
|
|
2753
|
+
char cpu_type[16];
|
|
2754
|
+
len = sizeof(cpu_type) - 1;
|
|
2755
|
+
mib[1] = HW_MACHINE;
|
|
2756
|
+
if (sysctl(mib, 2, cpu_type, &len, nullptr, 0) == 0) cpu_type[len] = 0;
|
|
2757
|
+
|
|
2758
|
+
fprintf(stderr, "CPU: %d * %s\n", ncpus, cpu_type);
|
|
2759
|
+
// no programmatic way to get the cache line size except on PPC
|
|
2760
|
+
}
|
|
2761
|
+
#endif
|
|
2611
2762
|
#endif
|
|
2612
2763
|
}
|
|
2613
2764
|
|
|
@@ -2668,22 +2819,54 @@ class Benchmark {
|
|
|
2668
2819
|
}
|
|
2669
2820
|
return cache;
|
|
2670
2821
|
} else {
|
|
2671
|
-
|
|
2822
|
+
LRUCacheOptions opts(
|
|
2823
|
+
static_cast<size_t>(capacity), FLAGS_cache_numshardbits,
|
|
2824
|
+
false /*strict_capacity_limit*/, FLAGS_cache_high_pri_pool_ratio,
|
|
2672
2825
|
#ifdef MEMKIND
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
std::make_shared<MemkindKmemAllocator>());
|
|
2677
|
-
|
|
2826
|
+
FLAGS_use_cache_memkind_kmem_allocator
|
|
2827
|
+
? std::make_shared<MemkindKmemAllocator>()
|
|
2828
|
+
: nullptr
|
|
2678
2829
|
#else
|
|
2830
|
+
nullptr
|
|
2831
|
+
#endif
|
|
2832
|
+
);
|
|
2833
|
+
if (FLAGS_use_cache_memkind_kmem_allocator) {
|
|
2834
|
+
#ifndef MEMKIND
|
|
2679
2835
|
fprintf(stderr, "Memkind library is not linked with the binary.");
|
|
2680
2836
|
exit(1);
|
|
2681
2837
|
#endif
|
|
2682
|
-
} else {
|
|
2683
|
-
return NewLRUCache(
|
|
2684
|
-
static_cast<size_t>(capacity), FLAGS_cache_numshardbits,
|
|
2685
|
-
false /*strict_capacity_limit*/, FLAGS_cache_high_pri_pool_ratio);
|
|
2686
2838
|
}
|
|
2839
|
+
#ifndef ROCKSDB_LITE
|
|
2840
|
+
if (!FLAGS_secondary_cache_uri.empty()) {
|
|
2841
|
+
Status s = SecondaryCache::CreateFromString(
|
|
2842
|
+
ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache);
|
|
2843
|
+
if (secondary_cache == nullptr) {
|
|
2844
|
+
fprintf(
|
|
2845
|
+
stderr,
|
|
2846
|
+
"No secondary cache registered matching string: %s status=%s\n",
|
|
2847
|
+
FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
|
|
2848
|
+
exit(1);
|
|
2849
|
+
}
|
|
2850
|
+
opts.secondary_cache = secondary_cache;
|
|
2851
|
+
}
|
|
2852
|
+
#endif // ROCKSDB_LITE
|
|
2853
|
+
|
|
2854
|
+
if (FLAGS_use_compressed_secondary_cache) {
|
|
2855
|
+
CompressedSecondaryCacheOptions secondary_cache_opts;
|
|
2856
|
+
secondary_cache_opts.capacity = FLAGS_compressed_secondary_cache_size;
|
|
2857
|
+
secondary_cache_opts.num_shard_bits =
|
|
2858
|
+
FLAGS_compressed_secondary_cache_numshardbits;
|
|
2859
|
+
secondary_cache_opts.high_pri_pool_ratio =
|
|
2860
|
+
FLAGS_compressed_secondary_cache_high_pri_pool_ratio;
|
|
2861
|
+
secondary_cache_opts.compression_type =
|
|
2862
|
+
FLAGS_compressed_secondary_cache_compression_type_e;
|
|
2863
|
+
secondary_cache_opts.compress_format_version =
|
|
2864
|
+
FLAGS_compressed_secondary_cache_compress_format_version;
|
|
2865
|
+
opts.secondary_cache =
|
|
2866
|
+
NewCompressedSecondaryCache(secondary_cache_opts);
|
|
2867
|
+
}
|
|
2868
|
+
|
|
2869
|
+
return NewLRUCache(opts);
|
|
2687
2870
|
}
|
|
2688
2871
|
}
|
|
2689
2872
|
|
|
@@ -2691,18 +2874,12 @@ class Benchmark {
|
|
|
2691
2874
|
Benchmark()
|
|
2692
2875
|
: cache_(NewCache(FLAGS_cache_size)),
|
|
2693
2876
|
compressed_cache_(NewCache(FLAGS_compressed_cache_size)),
|
|
2694
|
-
filter_policy_(
|
|
2695
|
-
FLAGS_use_ribbon_filter
|
|
2696
|
-
? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits)
|
|
2697
|
-
: FLAGS_bloom_bits >= 0
|
|
2698
|
-
? NewBloomFilterPolicy(FLAGS_bloom_bits,
|
|
2699
|
-
FLAGS_use_block_based_filter)
|
|
2700
|
-
: nullptr),
|
|
2701
2877
|
prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
|
|
2702
2878
|
num_(FLAGS_num),
|
|
2703
2879
|
key_size_(FLAGS_key_size),
|
|
2704
2880
|
user_timestamp_size_(FLAGS_user_timestamp_size),
|
|
2705
2881
|
prefix_size_(FLAGS_prefix_size),
|
|
2882
|
+
total_thread_count_(0),
|
|
2706
2883
|
keys_per_prefix_(FLAGS_keys_per_prefix),
|
|
2707
2884
|
entries_per_batch_(1),
|
|
2708
2885
|
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
|
@@ -2715,11 +2892,11 @@ class Benchmark {
|
|
|
2715
2892
|
merge_keys_(FLAGS_merge_keys < 0 ? FLAGS_num : FLAGS_merge_keys),
|
|
2716
2893
|
report_file_operations_(FLAGS_report_file_operations),
|
|
2717
2894
|
#ifndef ROCKSDB_LITE
|
|
2718
|
-
use_blob_db_(FLAGS_use_blob_db)
|
|
2895
|
+
use_blob_db_(FLAGS_use_blob_db), // Stacked BlobDB
|
|
2719
2896
|
#else
|
|
2720
|
-
use_blob_db_(false)
|
|
2897
|
+
use_blob_db_(false), // Stacked BlobDB
|
|
2721
2898
|
#endif // !ROCKSDB_LITE
|
|
2722
|
-
|
|
2899
|
+
read_operands_(false) {
|
|
2723
2900
|
// use simcache instead of cache
|
|
2724
2901
|
if (FLAGS_simcache_size >= 0) {
|
|
2725
2902
|
if (FLAGS_cache_numshardbits >= 1) {
|
|
@@ -2731,13 +2908,9 @@ class Benchmark {
|
|
|
2731
2908
|
}
|
|
2732
2909
|
|
|
2733
2910
|
if (report_file_operations_) {
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
"at the same time");
|
|
2738
|
-
exit(1);
|
|
2739
|
-
}
|
|
2740
|
-
FLAGS_env = new ReportFileOpEnv(FLAGS_env);
|
|
2911
|
+
FLAGS_env = new CompositeEnvWrapper(
|
|
2912
|
+
FLAGS_env,
|
|
2913
|
+
std::make_shared<CountedFileSystem>(FLAGS_env->GetFileSystem()));
|
|
2741
2914
|
}
|
|
2742
2915
|
|
|
2743
2916
|
if (FLAGS_prefix_size > FLAGS_key_size) {
|
|
@@ -2760,6 +2933,7 @@ class Benchmark {
|
|
|
2760
2933
|
}
|
|
2761
2934
|
#ifndef ROCKSDB_LITE
|
|
2762
2935
|
if (use_blob_db_) {
|
|
2936
|
+
// Stacked BlobDB
|
|
2763
2937
|
blob_db::DestroyBlobDB(FLAGS_db, options, blob_db::BlobDBOptions());
|
|
2764
2938
|
}
|
|
2765
2939
|
#endif // !ROCKSDB_LITE
|
|
@@ -2782,10 +2956,19 @@ class Benchmark {
|
|
|
2782
2956
|
}
|
|
2783
2957
|
}
|
|
2784
2958
|
|
|
2785
|
-
|
|
2959
|
+
void DeleteDBs() {
|
|
2786
2960
|
db_.DeleteDBs();
|
|
2961
|
+
for (const DBWithColumnFamilies& dbwcf : multi_dbs_) {
|
|
2962
|
+
delete dbwcf.db;
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
|
|
2966
|
+
~Benchmark() {
|
|
2967
|
+
DeleteDBs();
|
|
2787
2968
|
delete prefix_extractor_;
|
|
2788
2969
|
if (cache_.get() != nullptr) {
|
|
2970
|
+
// Clear cache reference first
|
|
2971
|
+
open_options_.write_buffer_manager.reset();
|
|
2789
2972
|
// this will leak, but we're shutting down so nobody cares
|
|
2790
2973
|
cache_->DisownData();
|
|
2791
2974
|
}
|
|
@@ -2914,10 +3097,7 @@ class Benchmark {
|
|
|
2914
3097
|
}
|
|
2915
3098
|
|
|
2916
3099
|
void ErrorExit() {
|
|
2917
|
-
|
|
2918
|
-
for (size_t i = 0; i < multi_dbs_.size(); i++) {
|
|
2919
|
-
delete multi_dbs_[i].db;
|
|
2920
|
-
}
|
|
3100
|
+
DeleteDBs();
|
|
2921
3101
|
exit(1);
|
|
2922
3102
|
}
|
|
2923
3103
|
|
|
@@ -2926,7 +3106,7 @@ class Benchmark {
|
|
|
2926
3106
|
ErrorExit();
|
|
2927
3107
|
}
|
|
2928
3108
|
Open(&open_options_);
|
|
2929
|
-
PrintHeader();
|
|
3109
|
+
PrintHeader(open_options_);
|
|
2930
3110
|
std::stringstream benchmark_stream(FLAGS_benchmarks);
|
|
2931
3111
|
std::string name;
|
|
2932
3112
|
std::unique_ptr<ExpiredTimeFilter> filter;
|
|
@@ -2949,6 +3129,17 @@ class Benchmark {
|
|
|
2949
3129
|
write_options_.sync = true;
|
|
2950
3130
|
}
|
|
2951
3131
|
write_options_.disableWAL = FLAGS_disable_wal;
|
|
3132
|
+
write_options_.rate_limiter_priority =
|
|
3133
|
+
FLAGS_rate_limit_auto_wal_flush ? Env::IO_USER : Env::IO_TOTAL;
|
|
3134
|
+
read_options_ = ReadOptions(FLAGS_verify_checksum, true);
|
|
3135
|
+
read_options_.total_order_seek = FLAGS_total_order_seek;
|
|
3136
|
+
read_options_.prefix_same_as_start = FLAGS_prefix_same_as_start;
|
|
3137
|
+
read_options_.rate_limiter_priority =
|
|
3138
|
+
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
|
3139
|
+
read_options_.tailing = FLAGS_use_tailing_iterator;
|
|
3140
|
+
read_options_.readahead_size = FLAGS_readahead_size;
|
|
3141
|
+
read_options_.adaptive_readahead = FLAGS_adaptive_readahead;
|
|
3142
|
+
read_options_.async_io = FLAGS_async_io;
|
|
2952
3143
|
|
|
2953
3144
|
void (Benchmark::*method)(ThreadState*) = nullptr;
|
|
2954
3145
|
void (Benchmark::*post_process_method)() = nullptr;
|
|
@@ -3019,12 +3210,13 @@ class Benchmark {
|
|
|
3019
3210
|
} else if (name == "fillrandom") {
|
|
3020
3211
|
fresh_db = true;
|
|
3021
3212
|
method = &Benchmark::WriteRandom;
|
|
3022
|
-
} else if (name == "filluniquerandom"
|
|
3213
|
+
} else if (name == "filluniquerandom" ||
|
|
3214
|
+
name == "fillanddeleteuniquerandom") {
|
|
3023
3215
|
fresh_db = true;
|
|
3024
3216
|
if (num_threads > 1) {
|
|
3025
3217
|
fprintf(stderr,
|
|
3026
|
-
"filluniquerandom
|
|
3027
|
-
", use 1 thread");
|
|
3218
|
+
"filluniquerandom and fillanddeleteuniquerandom "
|
|
3219
|
+
"multithreaded not supported, use 1 thread");
|
|
3028
3220
|
num_threads = 1;
|
|
3029
3221
|
}
|
|
3030
3222
|
method = &Benchmark::WriteUniqueRandom;
|
|
@@ -3136,10 +3328,24 @@ class Benchmark {
|
|
|
3136
3328
|
method = &Benchmark::Compact;
|
|
3137
3329
|
} else if (name == "compactall") {
|
|
3138
3330
|
CompactAll();
|
|
3331
|
+
#ifndef ROCKSDB_LITE
|
|
3332
|
+
} else if (name == "compact0") {
|
|
3333
|
+
CompactLevel(0);
|
|
3334
|
+
} else if (name == "compact1") {
|
|
3335
|
+
CompactLevel(1);
|
|
3336
|
+
} else if (name == "waitforcompaction") {
|
|
3337
|
+
WaitForCompaction();
|
|
3338
|
+
#endif
|
|
3339
|
+
} else if (name == "flush") {
|
|
3340
|
+
Flush();
|
|
3139
3341
|
} else if (name == "crc32c") {
|
|
3140
3342
|
method = &Benchmark::Crc32c;
|
|
3141
3343
|
} else if (name == "xxhash") {
|
|
3142
3344
|
method = &Benchmark::xxHash;
|
|
3345
|
+
} else if (name == "xxhash64") {
|
|
3346
|
+
method = &Benchmark::xxHash64;
|
|
3347
|
+
} else if (name == "xxh3") {
|
|
3348
|
+
method = &Benchmark::xxh3;
|
|
3143
3349
|
} else if (name == "acquireload") {
|
|
3144
3350
|
method = &Benchmark::AcquireLoad;
|
|
3145
3351
|
} else if (name == "compress") {
|
|
@@ -3171,10 +3377,19 @@ class Benchmark {
|
|
|
3171
3377
|
VerifyDBFromDB(FLAGS_truth_db);
|
|
3172
3378
|
} else if (name == "levelstats") {
|
|
3173
3379
|
PrintStats("rocksdb.levelstats");
|
|
3380
|
+
} else if (name == "memstats") {
|
|
3381
|
+
std::vector<std::string> keys{"rocksdb.num-immutable-mem-table",
|
|
3382
|
+
"rocksdb.cur-size-active-mem-table",
|
|
3383
|
+
"rocksdb.cur-size-all-mem-tables",
|
|
3384
|
+
"rocksdb.size-all-mem-tables",
|
|
3385
|
+
"rocksdb.num-entries-active-mem-table",
|
|
3386
|
+
"rocksdb.num-entries-imm-mem-tables"};
|
|
3387
|
+
PrintStats(keys);
|
|
3174
3388
|
} else if (name == "sstables") {
|
|
3175
3389
|
PrintStats("rocksdb.sstables");
|
|
3176
3390
|
} else if (name == "stats_history") {
|
|
3177
3391
|
PrintStatsHistory();
|
|
3392
|
+
#ifndef ROCKSDB_LITE
|
|
3178
3393
|
} else if (name == "replay") {
|
|
3179
3394
|
if (num_threads > 1) {
|
|
3180
3395
|
fprintf(stderr, "Multi-threaded replay is not yet supported\n");
|
|
@@ -3185,8 +3400,18 @@ class Benchmark {
|
|
|
3185
3400
|
ErrorExit();
|
|
3186
3401
|
}
|
|
3187
3402
|
method = &Benchmark::Replay;
|
|
3403
|
+
#endif // ROCKSDB_LITE
|
|
3188
3404
|
} else if (name == "getmergeoperands") {
|
|
3189
3405
|
method = &Benchmark::GetMergeOperands;
|
|
3406
|
+
#ifndef ROCKSDB_LITE
|
|
3407
|
+
} else if (name == "verifychecksum") {
|
|
3408
|
+
method = &Benchmark::VerifyChecksum;
|
|
3409
|
+
} else if (name == "verifyfilechecksums") {
|
|
3410
|
+
method = &Benchmark::VerifyFileChecksums;
|
|
3411
|
+
#endif // ROCKSDB_LITE
|
|
3412
|
+
} else if (name == "readrandomoperands") {
|
|
3413
|
+
read_operands_ = true;
|
|
3414
|
+
method = &Benchmark::ReadRandom;
|
|
3190
3415
|
} else if (!name.empty()) { // No error message for empty name
|
|
3191
3416
|
fprintf(stderr, "unknown benchmark '%s'\n", name.c_str());
|
|
3192
3417
|
ErrorExit();
|
|
@@ -3441,7 +3666,8 @@ class Benchmark {
|
|
|
3441
3666
|
arg[i].bm = this;
|
|
3442
3667
|
arg[i].method = method;
|
|
3443
3668
|
arg[i].shared = &shared;
|
|
3444
|
-
|
|
3669
|
+
total_thread_count_++;
|
|
3670
|
+
arg[i].thread = new ThreadState(i, total_thread_count_);
|
|
3445
3671
|
arg[i].thread->stats.SetReporterAgent(reporter_agent.get());
|
|
3446
3672
|
arg[i].thread->shared = &shared;
|
|
3447
3673
|
FLAGS_env->StartThread(ThreadBody, &arg[i]);
|
|
@@ -3474,44 +3700,42 @@ class Benchmark {
|
|
|
3474
3700
|
return merge_stats;
|
|
3475
3701
|
}
|
|
3476
3702
|
|
|
3477
|
-
|
|
3478
|
-
|
|
3703
|
+
template <OperationType kOpType, typename FnType, typename... Args>
|
|
3704
|
+
static inline void ChecksumBenchmark(FnType fn, ThreadState* thread,
|
|
3705
|
+
Args... args) {
|
|
3479
3706
|
const int size = FLAGS_block_size; // use --block_size option for db_bench
|
|
3480
3707
|
std::string labels = "(" + ToString(FLAGS_block_size) + " per op)";
|
|
3481
3708
|
const char* label = labels.c_str();
|
|
3482
3709
|
|
|
3483
3710
|
std::string data(size, 'x');
|
|
3484
|
-
|
|
3485
|
-
uint32_t
|
|
3486
|
-
while (bytes <
|
|
3487
|
-
|
|
3488
|
-
thread->stats.FinishedOps(nullptr, nullptr, 1,
|
|
3711
|
+
uint64_t bytes = 0;
|
|
3712
|
+
uint32_t val = 0;
|
|
3713
|
+
while (bytes < 5000U * uint64_t{1048576}) { // ~5GB
|
|
3714
|
+
val += static_cast<uint32_t>(fn(data.data(), size, args...));
|
|
3715
|
+
thread->stats.FinishedOps(nullptr, nullptr, 1, kOpType);
|
|
3489
3716
|
bytes += size;
|
|
3490
3717
|
}
|
|
3491
3718
|
// Print so result is not dead
|
|
3492
|
-
fprintf(stderr, "...
|
|
3719
|
+
fprintf(stderr, "... val=0x%x\r", static_cast<unsigned int>(val));
|
|
3493
3720
|
|
|
3494
3721
|
thread->stats.AddBytes(bytes);
|
|
3495
3722
|
thread->stats.AddMessage(label);
|
|
3496
3723
|
}
|
|
3497
3724
|
|
|
3725
|
+
void Crc32c(ThreadState* thread) {
|
|
3726
|
+
ChecksumBenchmark<kCrc>(crc32c::Value, thread);
|
|
3727
|
+
}
|
|
3728
|
+
|
|
3498
3729
|
void xxHash(ThreadState* thread) {
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
const char* label = "(4K per op)";
|
|
3502
|
-
std::string data(size, 'x');
|
|
3503
|
-
int64_t bytes = 0;
|
|
3504
|
-
unsigned int xxh32 = 0;
|
|
3505
|
-
while (bytes < 500 * 1048576) {
|
|
3506
|
-
xxh32 = XXH32(data.data(), size, 0);
|
|
3507
|
-
thread->stats.FinishedOps(nullptr, nullptr, 1, kHash);
|
|
3508
|
-
bytes += size;
|
|
3509
|
-
}
|
|
3510
|
-
// Print so result is not dead
|
|
3511
|
-
fprintf(stderr, "... xxh32=0x%x\r", static_cast<unsigned int>(xxh32));
|
|
3730
|
+
ChecksumBenchmark<kHash>(XXH32, thread, /*seed*/ 0);
|
|
3731
|
+
}
|
|
3512
3732
|
|
|
3513
|
-
|
|
3514
|
-
thread
|
|
3733
|
+
void xxHash64(ThreadState* thread) {
|
|
3734
|
+
ChecksumBenchmark<kHash>(XXH64, thread, /*seed*/ 0);
|
|
3735
|
+
}
|
|
3736
|
+
|
|
3737
|
+
void xxh3(ThreadState* thread) {
|
|
3738
|
+
ChecksumBenchmark<kHash>(XXH3_64bits, thread);
|
|
3515
3739
|
}
|
|
3516
3740
|
|
|
3517
3741
|
void AcquireLoad(ThreadState* thread) {
|
|
@@ -3627,6 +3851,8 @@ class Benchmark {
|
|
|
3627
3851
|
void InitializeOptionsFromFlags(Options* opts) {
|
|
3628
3852
|
printf("Initializing RocksDB Options from command-line flags\n");
|
|
3629
3853
|
Options& options = *opts;
|
|
3854
|
+
ConfigOptions config_options(options);
|
|
3855
|
+
config_options.ignore_unsupported_options = false;
|
|
3630
3856
|
|
|
3631
3857
|
assert(db_.db == nullptr);
|
|
3632
3858
|
|
|
@@ -3656,11 +3882,14 @@ class Benchmark {
|
|
|
3656
3882
|
options.use_direct_reads = FLAGS_use_direct_reads;
|
|
3657
3883
|
options.use_direct_io_for_flush_and_compaction =
|
|
3658
3884
|
FLAGS_use_direct_io_for_flush_and_compaction;
|
|
3885
|
+
options.manual_wal_flush = FLAGS_manual_wal_flush;
|
|
3886
|
+
options.wal_compression = FLAGS_wal_compression_e;
|
|
3659
3887
|
#ifndef ROCKSDB_LITE
|
|
3660
3888
|
options.ttl = FLAGS_fifo_compaction_ttl;
|
|
3661
3889
|
options.compaction_options_fifo = CompactionOptionsFIFO(
|
|
3662
3890
|
FLAGS_fifo_compaction_max_table_files_size_mb * 1024 * 1024,
|
|
3663
3891
|
FLAGS_fifo_compaction_allow_compaction);
|
|
3892
|
+
options.compaction_options_fifo.age_for_warm = FLAGS_fifo_age_for_warm;
|
|
3664
3893
|
#endif // ROCKSDB_LITE
|
|
3665
3894
|
if (FLAGS_prefix_size != 0) {
|
|
3666
3895
|
options.prefix_extractor.reset(
|
|
@@ -3686,8 +3915,6 @@ class Benchmark {
|
|
|
3686
3915
|
}
|
|
3687
3916
|
options.bloom_locality = FLAGS_bloom_locality;
|
|
3688
3917
|
options.max_file_opening_threads = FLAGS_file_opening_threads;
|
|
3689
|
-
options.new_table_reader_for_compaction_inputs =
|
|
3690
|
-
FLAGS_new_table_reader_for_compaction_inputs;
|
|
3691
3918
|
options.compaction_readahead_size = FLAGS_compaction_readahead_size;
|
|
3692
3919
|
options.log_readahead_size = FLAGS_log_readahead_size;
|
|
3693
3920
|
options.random_access_max_buffer_size = FLAGS_random_access_max_buffer_size;
|
|
@@ -3701,47 +3928,30 @@ class Benchmark {
|
|
|
3701
3928
|
FLAGS_level_compaction_dynamic_level_bytes;
|
|
3702
3929
|
options.max_bytes_for_level_multiplier =
|
|
3703
3930
|
FLAGS_max_bytes_for_level_multiplier;
|
|
3704
|
-
|
|
3705
|
-
|
|
3931
|
+
Status s =
|
|
3932
|
+
CreateMemTableRepFactory(config_options, &options.memtable_factory);
|
|
3933
|
+
if (!s.ok()) {
|
|
3934
|
+
fprintf(stderr, "Could not create memtable factory: %s\n",
|
|
3935
|
+
s.ToString().c_str());
|
|
3936
|
+
exit(1);
|
|
3937
|
+
} else if ((FLAGS_prefix_size == 0) &&
|
|
3938
|
+
(options.memtable_factory->IsInstanceOf("prefix_hash") ||
|
|
3939
|
+
options.memtable_factory->IsInstanceOf("hash_linkedlist"))) {
|
|
3706
3940
|
fprintf(stderr, "prefix_size should be non-zero if PrefixHash or "
|
|
3707
3941
|
"HashLinkedList memtablerep is used\n");
|
|
3708
3942
|
exit(1);
|
|
3709
3943
|
}
|
|
3710
|
-
switch (FLAGS_rep_factory) {
|
|
3711
|
-
case kSkipList:
|
|
3712
|
-
options.memtable_factory.reset(new SkipListFactory(
|
|
3713
|
-
FLAGS_skip_list_lookahead));
|
|
3714
|
-
break;
|
|
3715
|
-
#ifndef ROCKSDB_LITE
|
|
3716
|
-
case kPrefixHash:
|
|
3717
|
-
options.memtable_factory.reset(
|
|
3718
|
-
NewHashSkipListRepFactory(FLAGS_hash_bucket_count));
|
|
3719
|
-
break;
|
|
3720
|
-
case kHashLinkedList:
|
|
3721
|
-
options.memtable_factory.reset(NewHashLinkListRepFactory(
|
|
3722
|
-
FLAGS_hash_bucket_count));
|
|
3723
|
-
break;
|
|
3724
|
-
case kVectorRep:
|
|
3725
|
-
options.memtable_factory.reset(
|
|
3726
|
-
new VectorRepFactory
|
|
3727
|
-
);
|
|
3728
|
-
break;
|
|
3729
|
-
#else
|
|
3730
|
-
default:
|
|
3731
|
-
fprintf(stderr, "Only skip list is supported in lite mode\n");
|
|
3732
|
-
exit(1);
|
|
3733
|
-
#endif // ROCKSDB_LITE
|
|
3734
|
-
}
|
|
3735
3944
|
if (FLAGS_use_plain_table) {
|
|
3736
3945
|
#ifndef ROCKSDB_LITE
|
|
3737
|
-
if (
|
|
3738
|
-
|
|
3739
|
-
fprintf(stderr, "
|
|
3946
|
+
if (!options.memtable_factory->IsInstanceOf("prefix_hash") &&
|
|
3947
|
+
!options.memtable_factory->IsInstanceOf("hash_linkedlist")) {
|
|
3948
|
+
fprintf(stderr, "Warning: plain table is used with %s\n",
|
|
3949
|
+
options.memtable_factory->Name());
|
|
3740
3950
|
}
|
|
3741
3951
|
|
|
3742
3952
|
int bloom_bits_per_key = FLAGS_bloom_bits;
|
|
3743
3953
|
if (bloom_bits_per_key < 0) {
|
|
3744
|
-
bloom_bits_per_key =
|
|
3954
|
+
bloom_bits_per_key = PlainTableOptions().bloom_bits_per_key;
|
|
3745
3955
|
}
|
|
3746
3956
|
|
|
3747
3957
|
PlainTableOptions plain_table_options;
|
|
@@ -3777,6 +3987,8 @@ class Benchmark {
|
|
|
3777
3987
|
#endif // ROCKSDB_LITE
|
|
3778
3988
|
} else {
|
|
3779
3989
|
BlockBasedTableOptions block_based_options;
|
|
3990
|
+
block_based_options.checksum =
|
|
3991
|
+
static_cast<ChecksumType>(FLAGS_checksum_type);
|
|
3780
3992
|
if (FLAGS_use_hash_search) {
|
|
3781
3993
|
if (FLAGS_prefix_size == 0) {
|
|
3782
3994
|
fprintf(stderr,
|
|
@@ -3843,18 +4055,35 @@ class Benchmark {
|
|
|
3843
4055
|
true;
|
|
3844
4056
|
}
|
|
3845
4057
|
block_based_options.block_cache = cache_;
|
|
4058
|
+
block_based_options.reserve_table_reader_memory =
|
|
4059
|
+
FLAGS_reserve_table_reader_memory;
|
|
3846
4060
|
block_based_options.block_cache_compressed = compressed_cache_;
|
|
3847
4061
|
block_based_options.block_size = FLAGS_block_size;
|
|
3848
4062
|
block_based_options.block_restart_interval = FLAGS_block_restart_interval;
|
|
3849
4063
|
block_based_options.index_block_restart_interval =
|
|
3850
4064
|
FLAGS_index_block_restart_interval;
|
|
3851
|
-
block_based_options.filter_policy = filter_policy_;
|
|
3852
4065
|
block_based_options.format_version =
|
|
3853
4066
|
static_cast<uint32_t>(FLAGS_format_version);
|
|
3854
4067
|
block_based_options.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit;
|
|
3855
4068
|
block_based_options.enable_index_compression =
|
|
3856
4069
|
FLAGS_enable_index_compression;
|
|
3857
4070
|
block_based_options.block_align = FLAGS_block_align;
|
|
4071
|
+
block_based_options.whole_key_filtering = FLAGS_whole_key_filtering;
|
|
4072
|
+
BlockBasedTableOptions::PrepopulateBlockCache prepopulate_block_cache =
|
|
4073
|
+
block_based_options.prepopulate_block_cache;
|
|
4074
|
+
switch (FLAGS_prepopulate_block_cache) {
|
|
4075
|
+
case 0:
|
|
4076
|
+
prepopulate_block_cache =
|
|
4077
|
+
BlockBasedTableOptions::PrepopulateBlockCache::kDisable;
|
|
4078
|
+
break;
|
|
4079
|
+
case 1:
|
|
4080
|
+
prepopulate_block_cache =
|
|
4081
|
+
BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
|
|
4082
|
+
break;
|
|
4083
|
+
default:
|
|
4084
|
+
fprintf(stderr, "Unknown prepopulate block cache mode\n");
|
|
4085
|
+
}
|
|
4086
|
+
block_based_options.prepopulate_block_cache = prepopulate_block_cache;
|
|
3858
4087
|
if (FLAGS_use_data_block_hash_index) {
|
|
3859
4088
|
block_based_options.data_block_index_type =
|
|
3860
4089
|
ROCKSDB_NAMESPACE::BlockBasedTableOptions::kDataBlockBinaryAndHash;
|
|
@@ -3923,6 +4152,9 @@ class Benchmark {
|
|
|
3923
4152
|
options.level0_slowdown_writes_trigger =
|
|
3924
4153
|
FLAGS_level0_slowdown_writes_trigger;
|
|
3925
4154
|
options.compression = FLAGS_compression_type_e;
|
|
4155
|
+
if (FLAGS_simulate_hybrid_fs_file != "") {
|
|
4156
|
+
options.bottommost_temperature = Temperature::kWarm;
|
|
4157
|
+
}
|
|
3926
4158
|
options.sample_for_compression = FLAGS_sample_for_compression;
|
|
3927
4159
|
options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
|
|
3928
4160
|
options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;
|
|
@@ -3939,8 +4171,6 @@ class Benchmark {
|
|
|
3939
4171
|
options.compression_per_level[i] = FLAGS_compression_type_e;
|
|
3940
4172
|
}
|
|
3941
4173
|
}
|
|
3942
|
-
options.soft_rate_limit = FLAGS_soft_rate_limit;
|
|
3943
|
-
options.hard_rate_limit = FLAGS_hard_rate_limit;
|
|
3944
4174
|
options.soft_pending_compaction_bytes_limit =
|
|
3945
4175
|
FLAGS_soft_pending_compaction_bytes_limit;
|
|
3946
4176
|
options.hard_pending_compaction_bytes_limit =
|
|
@@ -3948,6 +4178,8 @@ class Benchmark {
|
|
|
3948
4178
|
options.delayed_write_rate = FLAGS_delayed_write_rate;
|
|
3949
4179
|
options.allow_concurrent_memtable_write =
|
|
3950
4180
|
FLAGS_allow_concurrent_memtable_write;
|
|
4181
|
+
options.experimental_mempurge_threshold =
|
|
4182
|
+
FLAGS_experimental_mempurge_threshold;
|
|
3951
4183
|
options.inplace_update_support = FLAGS_inplace_update_support;
|
|
3952
4184
|
options.inplace_update_num_locks = FLAGS_inplace_update_num_locks;
|
|
3953
4185
|
options.enable_write_thread_adaptive_yield =
|
|
@@ -3956,14 +4188,16 @@ class Benchmark {
|
|
|
3956
4188
|
options.unordered_write = FLAGS_unordered_write;
|
|
3957
4189
|
options.write_thread_max_yield_usec = FLAGS_write_thread_max_yield_usec;
|
|
3958
4190
|
options.write_thread_slow_yield_usec = FLAGS_write_thread_slow_yield_usec;
|
|
3959
|
-
options.rate_limit_delay_max_milliseconds =
|
|
3960
|
-
FLAGS_rate_limit_delay_max_milliseconds;
|
|
3961
4191
|
options.table_cache_numshardbits = FLAGS_table_cache_numshardbits;
|
|
3962
4192
|
options.max_compaction_bytes = FLAGS_max_compaction_bytes;
|
|
3963
4193
|
options.disable_auto_compactions = FLAGS_disable_auto_compactions;
|
|
3964
4194
|
options.optimize_filters_for_hits = FLAGS_optimize_filters_for_hits;
|
|
4195
|
+
options.paranoid_checks = FLAGS_paranoid_checks;
|
|
4196
|
+
options.force_consistency_checks = FLAGS_force_consistency_checks;
|
|
4197
|
+
options.check_flush_compaction_key_order =
|
|
4198
|
+
FLAGS_check_flush_compaction_key_order;
|
|
3965
4199
|
options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds;
|
|
3966
|
-
|
|
4200
|
+
options.ttl = FLAGS_ttl_seconds;
|
|
3967
4201
|
// fill storage options
|
|
3968
4202
|
options.advise_random_on_open = FLAGS_advise_random_on_open;
|
|
3969
4203
|
options.access_hint_on_compaction_start = FLAGS_compaction_fadvice_e;
|
|
@@ -3972,12 +4206,14 @@ class Benchmark {
|
|
|
3972
4206
|
options.wal_bytes_per_sync = FLAGS_wal_bytes_per_sync;
|
|
3973
4207
|
|
|
3974
4208
|
// merge operator options
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
|
|
3980
|
-
|
|
4209
|
+
if (!FLAGS_merge_operator.empty()) {
|
|
4210
|
+
s = MergeOperator::CreateFromString(config_options, FLAGS_merge_operator,
|
|
4211
|
+
&options.merge_operator);
|
|
4212
|
+
if (!s.ok()) {
|
|
4213
|
+
fprintf(stderr, "invalid merge operator[%s]: %s\n",
|
|
4214
|
+
FLAGS_merge_operator.c_str(), s.ToString().c_str());
|
|
4215
|
+
exit(1);
|
|
4216
|
+
}
|
|
3981
4217
|
}
|
|
3982
4218
|
options.max_successive_merges = FLAGS_max_successive_merges;
|
|
3983
4219
|
options.report_bg_io_stats = FLAGS_report_bg_io_stats;
|
|
@@ -4005,6 +4241,8 @@ class Benchmark {
|
|
|
4005
4241
|
}
|
|
4006
4242
|
options.compaction_options_universal.allow_trivial_move =
|
|
4007
4243
|
FLAGS_universal_allow_trivial_move;
|
|
4244
|
+
options.compaction_options_universal.incremental =
|
|
4245
|
+
FLAGS_universal_incremental;
|
|
4008
4246
|
if (FLAGS_thread_status_per_interval > 0) {
|
|
4009
4247
|
options.enable_thread_tracking = true;
|
|
4010
4248
|
}
|
|
@@ -4014,9 +4252,24 @@ class Benchmark {
|
|
|
4014
4252
|
fprintf(stderr, "Only 64 bits timestamps are supported.\n");
|
|
4015
4253
|
exit(1);
|
|
4016
4254
|
}
|
|
4017
|
-
options.comparator =
|
|
4255
|
+
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
4018
4256
|
}
|
|
4019
4257
|
|
|
4258
|
+
// Integrated BlobDB
|
|
4259
|
+
options.enable_blob_files = FLAGS_enable_blob_files;
|
|
4260
|
+
options.min_blob_size = FLAGS_min_blob_size;
|
|
4261
|
+
options.blob_file_size = FLAGS_blob_file_size;
|
|
4262
|
+
options.blob_compression_type =
|
|
4263
|
+
StringToCompressionType(FLAGS_blob_compression_type.c_str());
|
|
4264
|
+
options.enable_blob_garbage_collection =
|
|
4265
|
+
FLAGS_enable_blob_garbage_collection;
|
|
4266
|
+
options.blob_garbage_collection_age_cutoff =
|
|
4267
|
+
FLAGS_blob_garbage_collection_age_cutoff;
|
|
4268
|
+
options.blob_garbage_collection_force_threshold =
|
|
4269
|
+
FLAGS_blob_garbage_collection_force_threshold;
|
|
4270
|
+
options.blob_compaction_readahead_size =
|
|
4271
|
+
FLAGS_blob_compaction_readahead_size;
|
|
4272
|
+
|
|
4020
4273
|
#ifndef ROCKSDB_LITE
|
|
4021
4274
|
if (FLAGS_readonly && FLAGS_transaction_db) {
|
|
4022
4275
|
fprintf(stderr, "Cannot use readonly flag with transaction_db\n");
|
|
@@ -4046,6 +4299,7 @@ class Benchmark {
|
|
|
4046
4299
|
options.persist_stats_to_disk = FLAGS_persist_stats_to_disk;
|
|
4047
4300
|
options.stats_history_buffer_size =
|
|
4048
4301
|
static_cast<size_t>(FLAGS_stats_history_buffer_size);
|
|
4302
|
+
options.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery;
|
|
4049
4303
|
|
|
4050
4304
|
options.compression_opts.level = FLAGS_compression_level;
|
|
4051
4305
|
options.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes;
|
|
@@ -4053,6 +4307,8 @@ class Benchmark {
|
|
|
4053
4307
|
FLAGS_compression_zstd_max_train_bytes;
|
|
4054
4308
|
options.compression_opts.parallel_threads =
|
|
4055
4309
|
FLAGS_compression_parallel_threads;
|
|
4310
|
+
options.compression_opts.max_dict_buffer_bytes =
|
|
4311
|
+
FLAGS_compression_max_dict_buffer_bytes;
|
|
4056
4312
|
// If this is a block based table, set some related options
|
|
4057
4313
|
auto table_options =
|
|
4058
4314
|
options.table_factory->GetOptions<BlockBasedTableOptions>();
|
|
@@ -4060,12 +4316,26 @@ class Benchmark {
|
|
|
4060
4316
|
if (FLAGS_cache_size) {
|
|
4061
4317
|
table_options->block_cache = cache_;
|
|
4062
4318
|
}
|
|
4063
|
-
if (FLAGS_bloom_bits
|
|
4319
|
+
if (FLAGS_bloom_bits < 0) {
|
|
4320
|
+
table_options->filter_policy = BlockBasedTableOptions().filter_policy;
|
|
4321
|
+
} else if (FLAGS_bloom_bits == 0) {
|
|
4322
|
+
table_options->filter_policy.reset();
|
|
4323
|
+
} else if (FLAGS_use_block_based_filter) {
|
|
4324
|
+
// Use back-door way of enabling obsolete block-based Bloom
|
|
4325
|
+
Status s = FilterPolicy::CreateFromString(
|
|
4326
|
+
ConfigOptions(),
|
|
4327
|
+
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:" +
|
|
4328
|
+
ROCKSDB_NAMESPACE::ToString(FLAGS_bloom_bits),
|
|
4329
|
+
&table_options->filter_policy);
|
|
4330
|
+
if (!s.ok()) {
|
|
4331
|
+
fprintf(stderr, "failure creating obsolete block-based filter: %s\n",
|
|
4332
|
+
s.ToString().c_str());
|
|
4333
|
+
exit(1);
|
|
4334
|
+
}
|
|
4335
|
+
} else {
|
|
4064
4336
|
table_options->filter_policy.reset(
|
|
4065
|
-
FLAGS_use_ribbon_filter
|
|
4066
|
-
|
|
4067
|
-
: NewBloomFilterPolicy(FLAGS_bloom_bits,
|
|
4068
|
-
FLAGS_use_block_based_filter));
|
|
4337
|
+
FLAGS_use_ribbon_filter ? NewRibbonFilterPolicy(FLAGS_bloom_bits)
|
|
4338
|
+
: NewBloomFilterPolicy(FLAGS_bloom_bits));
|
|
4069
4339
|
}
|
|
4070
4340
|
}
|
|
4071
4341
|
if (FLAGS_row_cache_size) {
|
|
@@ -4090,15 +4360,8 @@ class Benchmark {
|
|
|
4090
4360
|
}
|
|
4091
4361
|
|
|
4092
4362
|
if (FLAGS_rate_limiter_bytes_per_sec > 0) {
|
|
4093
|
-
if (FLAGS_rate_limit_bg_reads &&
|
|
4094
|
-
!FLAGS_new_table_reader_for_compaction_inputs) {
|
|
4095
|
-
fprintf(stderr,
|
|
4096
|
-
"rate limit compaction reads must have "
|
|
4097
|
-
"new_table_reader_for_compaction_inputs set\n");
|
|
4098
|
-
exit(1);
|
|
4099
|
-
}
|
|
4100
4363
|
options.rate_limiter.reset(NewGenericRateLimiter(
|
|
4101
|
-
FLAGS_rate_limiter_bytes_per_sec,
|
|
4364
|
+
FLAGS_rate_limiter_bytes_per_sec, FLAGS_rate_limiter_refill_period_us,
|
|
4102
4365
|
10 /* fairness */,
|
|
4103
4366
|
FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly
|
|
4104
4367
|
: RateLimiter::Mode::kWritesOnly,
|
|
@@ -4106,6 +4369,12 @@ class Benchmark {
|
|
|
4106
4369
|
}
|
|
4107
4370
|
|
|
4108
4371
|
options.listeners.emplace_back(listener_);
|
|
4372
|
+
|
|
4373
|
+
if (FLAGS_file_checksum) {
|
|
4374
|
+
options.file_checksum_gen_factory.reset(
|
|
4375
|
+
new FileChecksumGenCrc32cFactory());
|
|
4376
|
+
}
|
|
4377
|
+
|
|
4109
4378
|
if (FLAGS_num_multi_db <= 1) {
|
|
4110
4379
|
OpenDb(options, FLAGS_db, &db_);
|
|
4111
4380
|
} else {
|
|
@@ -4130,7 +4399,7 @@ class Benchmark {
|
|
|
4130
4399
|
if (FLAGS_use_existing_keys) {
|
|
4131
4400
|
// Only work on single database
|
|
4132
4401
|
assert(db_.db != nullptr);
|
|
4133
|
-
ReadOptions read_opts;
|
|
4402
|
+
ReadOptions read_opts; // before read_options_ initialized
|
|
4134
4403
|
read_opts.total_order_seek = true;
|
|
4135
4404
|
Iterator* iter = db_.db->NewIterator(read_opts);
|
|
4136
4405
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
@@ -4151,6 +4420,7 @@ class Benchmark {
|
|
|
4151
4420
|
|
|
4152
4421
|
void OpenDb(Options options, const std::string& db_name,
|
|
4153
4422
|
DBWithColumnFamilies* db) {
|
|
4423
|
+
uint64_t open_start = FLAGS_report_open_timing ? FLAGS_env->NowNanos() : 0;
|
|
4154
4424
|
Status s;
|
|
4155
4425
|
// Open with column families if necessary.
|
|
4156
4426
|
if (FLAGS_num_column_families > 1) {
|
|
@@ -4245,6 +4515,7 @@ class Benchmark {
|
|
|
4245
4515
|
db->db = ptr;
|
|
4246
4516
|
}
|
|
4247
4517
|
} else if (FLAGS_use_blob_db) {
|
|
4518
|
+
// Stacked BlobDB
|
|
4248
4519
|
blob_db::BlobDBOptions blob_db_options;
|
|
4249
4520
|
blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc;
|
|
4250
4521
|
blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff;
|
|
@@ -4290,6 +4561,11 @@ class Benchmark {
|
|
|
4290
4561
|
} else {
|
|
4291
4562
|
s = DB::Open(options, db_name, &db->db);
|
|
4292
4563
|
}
|
|
4564
|
+
if (FLAGS_report_open_timing) {
|
|
4565
|
+
std::cout << "OpenDb: "
|
|
4566
|
+
<< (FLAGS_env->NowNanos() - open_start) / 1000000.0
|
|
4567
|
+
<< " milliseconds\n";
|
|
4568
|
+
}
|
|
4293
4569
|
if (!s.ok()) {
|
|
4294
4570
|
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
|
4295
4571
|
exit(1);
|
|
@@ -4336,7 +4612,7 @@ class Benchmark {
|
|
|
4336
4612
|
values_[i] = i;
|
|
4337
4613
|
}
|
|
4338
4614
|
RandomShuffle(values_.begin(), values_.end(),
|
|
4339
|
-
static_cast<uint32_t>(
|
|
4615
|
+
static_cast<uint32_t>(seed_base));
|
|
4340
4616
|
}
|
|
4341
4617
|
}
|
|
4342
4618
|
|
|
@@ -4354,6 +4630,13 @@ class Benchmark {
|
|
|
4354
4630
|
return std::numeric_limits<uint64_t>::max();
|
|
4355
4631
|
}
|
|
4356
4632
|
|
|
4633
|
+
// Only available for UNIQUE_RANDOM mode.
|
|
4634
|
+
uint64_t Fetch(uint64_t index) {
|
|
4635
|
+
assert(mode_ == UNIQUE_RANDOM);
|
|
4636
|
+
assert(index < values_.size());
|
|
4637
|
+
return values_[index];
|
|
4638
|
+
}
|
|
4639
|
+
|
|
4357
4640
|
private:
|
|
4358
4641
|
Random64* rand_;
|
|
4359
4642
|
WriteMode mode_;
|
|
@@ -4400,10 +4683,10 @@ class Benchmark {
|
|
|
4400
4683
|
}
|
|
4401
4684
|
|
|
4402
4685
|
Duration duration(test_duration, max_ops, ops_per_stage);
|
|
4686
|
+
const uint64_t num_per_key_gen = num_ + max_num_range_tombstones_;
|
|
4403
4687
|
for (size_t i = 0; i < num_key_gens; i++) {
|
|
4404
4688
|
key_gens[i].reset(new KeyGenerator(&(thread->rand), write_mode,
|
|
4405
|
-
|
|
4406
|
-
ops_per_stage));
|
|
4689
|
+
num_per_key_gen, ops_per_stage));
|
|
4407
4690
|
}
|
|
4408
4691
|
|
|
4409
4692
|
if (num_ != FLAGS_num) {
|
|
@@ -4414,7 +4697,7 @@ class Benchmark {
|
|
|
4414
4697
|
|
|
4415
4698
|
RandomGenerator gen;
|
|
4416
4699
|
WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
|
|
4417
|
-
user_timestamp_size_);
|
|
4700
|
+
/*protection_bytes_per_key=*/0, user_timestamp_size_);
|
|
4418
4701
|
Status s;
|
|
4419
4702
|
int64_t bytes = 0;
|
|
4420
4703
|
|
|
@@ -4424,6 +4707,79 @@ class Benchmark {
|
|
|
4424
4707
|
Slice begin_key = AllocateKey(&begin_key_guard);
|
|
4425
4708
|
std::unique_ptr<const char[]> end_key_guard;
|
|
4426
4709
|
Slice end_key = AllocateKey(&end_key_guard);
|
|
4710
|
+
double p = 0.0;
|
|
4711
|
+
uint64_t num_overwrites = 0, num_unique_keys = 0, num_selective_deletes = 0;
|
|
4712
|
+
// If user set overwrite_probability flag,
|
|
4713
|
+
// check if value is in [0.0,1.0].
|
|
4714
|
+
if (FLAGS_overwrite_probability > 0.0) {
|
|
4715
|
+
p = FLAGS_overwrite_probability > 1.0 ? 1.0 : FLAGS_overwrite_probability;
|
|
4716
|
+
// If overwrite set by user, and UNIQUE_RANDOM mode on,
|
|
4717
|
+
// the overwrite_window_size must be > 0.
|
|
4718
|
+
if (write_mode == UNIQUE_RANDOM && FLAGS_overwrite_window_size == 0) {
|
|
4719
|
+
fprintf(stderr,
|
|
4720
|
+
"Overwrite_window_size must be strictly greater than 0.\n");
|
|
4721
|
+
ErrorExit();
|
|
4722
|
+
}
|
|
4723
|
+
}
|
|
4724
|
+
|
|
4725
|
+
// Default_random_engine provides slightly
|
|
4726
|
+
// improved throughput over mt19937.
|
|
4727
|
+
std::default_random_engine overwrite_gen{
|
|
4728
|
+
static_cast<unsigned int>(seed_base)};
|
|
4729
|
+
std::bernoulli_distribution overwrite_decider(p);
|
|
4730
|
+
|
|
4731
|
+
// Inserted key window is filled with the last N
|
|
4732
|
+
// keys previously inserted into the DB (with
|
|
4733
|
+
// N=FLAGS_overwrite_window_size).
|
|
4734
|
+
// We use a deque struct because:
|
|
4735
|
+
// - random access is O(1)
|
|
4736
|
+
// - insertion/removal at beginning/end is also O(1).
|
|
4737
|
+
std::deque<int64_t> inserted_key_window;
|
|
4738
|
+
Random64 reservoir_id_gen(seed_base);
|
|
4739
|
+
|
|
4740
|
+
// --- Variables used in disposable/persistent keys simulation:
|
|
4741
|
+
// The following variables are used when
|
|
4742
|
+
// disposable_entries_batch_size is >0. We simualte a workload
|
|
4743
|
+
// where the following sequence is repeated multiple times:
|
|
4744
|
+
// "A set of keys S1 is inserted ('disposable entries'), then after
|
|
4745
|
+
// some delay another set of keys S2 is inserted ('persistent entries')
|
|
4746
|
+
// and the first set of keys S1 is deleted. S2 artificially represents
|
|
4747
|
+
// the insertion of hypothetical results from some undefined computation
|
|
4748
|
+
// done on the first set of keys S1. The next sequence can start as soon
|
|
4749
|
+
// as the last disposable entry in the set S1 of this sequence is
|
|
4750
|
+
// inserted, if the delay is non negligible"
|
|
4751
|
+
bool skip_for_loop = false, is_disposable_entry = true;
|
|
4752
|
+
std::vector<uint64_t> disposable_entries_index(num_key_gens, 0);
|
|
4753
|
+
std::vector<uint64_t> persistent_ent_and_del_index(num_key_gens, 0);
|
|
4754
|
+
const uint64_t kNumDispAndPersEntries =
|
|
4755
|
+
FLAGS_disposable_entries_batch_size +
|
|
4756
|
+
FLAGS_persistent_entries_batch_size;
|
|
4757
|
+
if (kNumDispAndPersEntries > 0) {
|
|
4758
|
+
if ((write_mode != UNIQUE_RANDOM) || (writes_per_range_tombstone_ > 0) ||
|
|
4759
|
+
(p > 0.0)) {
|
|
4760
|
+
fprintf(
|
|
4761
|
+
stderr,
|
|
4762
|
+
"Disposable/persistent deletes are not compatible with overwrites "
|
|
4763
|
+
"and DeleteRanges; and are only supported in filluniquerandom.\n");
|
|
4764
|
+
ErrorExit();
|
|
4765
|
+
}
|
|
4766
|
+
if (FLAGS_disposable_entries_value_size < 0 ||
|
|
4767
|
+
FLAGS_persistent_entries_value_size < 0) {
|
|
4768
|
+
fprintf(
|
|
4769
|
+
stderr,
|
|
4770
|
+
"disposable_entries_value_size and persistent_entries_value_size"
|
|
4771
|
+
"have to be positive.\n");
|
|
4772
|
+
ErrorExit();
|
|
4773
|
+
}
|
|
4774
|
+
}
|
|
4775
|
+
Random rnd_disposable_entry(static_cast<uint32_t>(seed_base));
|
|
4776
|
+
std::string random_value;
|
|
4777
|
+
// Queue that stores scheduled timestamp of disposable entries deletes,
|
|
4778
|
+
// along with starting index of disposable entry keys to delete.
|
|
4779
|
+
std::vector<std::queue<std::pair<uint64_t, uint64_t>>> disposable_entries_q(
|
|
4780
|
+
num_key_gens);
|
|
4781
|
+
// --- End of variables used in disposable/persistent keys simulation.
|
|
4782
|
+
|
|
4427
4783
|
std::vector<std::unique_ptr<const char[]>> expanded_key_guards;
|
|
4428
4784
|
std::vector<Slice> expanded_keys;
|
|
4429
4785
|
if (FLAGS_expand_range_tombstones) {
|
|
@@ -4440,7 +4796,10 @@ class Benchmark {
|
|
|
4440
4796
|
|
|
4441
4797
|
int64_t stage = 0;
|
|
4442
4798
|
int64_t num_written = 0;
|
|
4443
|
-
|
|
4799
|
+
int64_t next_seq_db_at = num_ops;
|
|
4800
|
+
size_t id = 0;
|
|
4801
|
+
|
|
4802
|
+
while ((num_per_key_gen != 0) && !duration.Done(entries_per_batch_)) {
|
|
4444
4803
|
if (duration.GetStage() != stage) {
|
|
4445
4804
|
stage = duration.GetStage();
|
|
4446
4805
|
if (db_.db != nullptr) {
|
|
@@ -4452,17 +4811,144 @@ class Benchmark {
|
|
|
4452
4811
|
}
|
|
4453
4812
|
}
|
|
4454
4813
|
|
|
4455
|
-
|
|
4814
|
+
if (write_mode != SEQUENTIAL) {
|
|
4815
|
+
id = thread->rand.Next() % num_key_gens;
|
|
4816
|
+
} else {
|
|
4817
|
+
// When doing a sequential load with multiple databases, load them in
|
|
4818
|
+
// order rather than all at the same time to avoid:
|
|
4819
|
+
// 1) long delays between flushing memtables
|
|
4820
|
+
// 2) flushing memtables for all of them at the same point in time
|
|
4821
|
+
// 3) not putting the same number of keys in each database
|
|
4822
|
+
if (num_written >= next_seq_db_at) {
|
|
4823
|
+
next_seq_db_at += num_ops;
|
|
4824
|
+
id++;
|
|
4825
|
+
if (id >= num_key_gens) {
|
|
4826
|
+
fprintf(stderr, "Logic error. Filled all databases\n");
|
|
4827
|
+
ErrorExit();
|
|
4828
|
+
}
|
|
4829
|
+
}
|
|
4830
|
+
}
|
|
4456
4831
|
DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id);
|
|
4832
|
+
|
|
4457
4833
|
batch.Clear();
|
|
4458
4834
|
int64_t batch_bytes = 0;
|
|
4459
4835
|
|
|
4460
4836
|
for (int64_t j = 0; j < entries_per_batch_; j++) {
|
|
4461
|
-
int64_t rand_num =
|
|
4837
|
+
int64_t rand_num = 0;
|
|
4838
|
+
if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
|
|
4839
|
+
if ((inserted_key_window.size() > 0) &&
|
|
4840
|
+
overwrite_decider(overwrite_gen)) {
|
|
4841
|
+
num_overwrites++;
|
|
4842
|
+
rand_num = inserted_key_window[reservoir_id_gen.Next() %
|
|
4843
|
+
inserted_key_window.size()];
|
|
4844
|
+
} else {
|
|
4845
|
+
num_unique_keys++;
|
|
4846
|
+
rand_num = key_gens[id]->Next();
|
|
4847
|
+
if (inserted_key_window.size() < FLAGS_overwrite_window_size) {
|
|
4848
|
+
inserted_key_window.push_back(rand_num);
|
|
4849
|
+
} else {
|
|
4850
|
+
inserted_key_window.pop_front();
|
|
4851
|
+
inserted_key_window.push_back(rand_num);
|
|
4852
|
+
}
|
|
4853
|
+
}
|
|
4854
|
+
} else if (kNumDispAndPersEntries > 0) {
|
|
4855
|
+
// Check if queue is non-empty and if we need to insert
|
|
4856
|
+
// 'persistent' KV entries (KV entries that are never deleted)
|
|
4857
|
+
// and delete disposable entries previously inserted.
|
|
4858
|
+
if (!disposable_entries_q[id].empty() &&
|
|
4859
|
+
(disposable_entries_q[id].front().first <
|
|
4860
|
+
FLAGS_env->NowMicros())) {
|
|
4861
|
+
// If we need to perform a "merge op" pattern,
|
|
4862
|
+
// we first write all the persistent KV entries not targeted
|
|
4863
|
+
// by deletes, and then we write the disposable entries deletes.
|
|
4864
|
+
if (persistent_ent_and_del_index[id] <
|
|
4865
|
+
FLAGS_persistent_entries_batch_size) {
|
|
4866
|
+
// Generate key to insert.
|
|
4867
|
+
rand_num =
|
|
4868
|
+
key_gens[id]->Fetch(disposable_entries_q[id].front().second +
|
|
4869
|
+
FLAGS_disposable_entries_batch_size +
|
|
4870
|
+
persistent_ent_and_del_index[id]);
|
|
4871
|
+
persistent_ent_and_del_index[id]++;
|
|
4872
|
+
is_disposable_entry = false;
|
|
4873
|
+
skip_for_loop = false;
|
|
4874
|
+
} else if (persistent_ent_and_del_index[id] <
|
|
4875
|
+
kNumDispAndPersEntries) {
|
|
4876
|
+
// Find key of the entry to delete.
|
|
4877
|
+
rand_num =
|
|
4878
|
+
key_gens[id]->Fetch(disposable_entries_q[id].front().second +
|
|
4879
|
+
(persistent_ent_and_del_index[id] -
|
|
4880
|
+
FLAGS_persistent_entries_batch_size));
|
|
4881
|
+
persistent_ent_and_del_index[id]++;
|
|
4882
|
+
GenerateKeyFromInt(rand_num, FLAGS_num, &key);
|
|
4883
|
+
// For the delete operation, everything happens here and we
|
|
4884
|
+
// skip the rest of the for-loop, which is designed for
|
|
4885
|
+
// inserts.
|
|
4886
|
+
if (FLAGS_num_column_families <= 1) {
|
|
4887
|
+
batch.Delete(key);
|
|
4888
|
+
} else {
|
|
4889
|
+
// We use same rand_num as seed for key and column family so
|
|
4890
|
+
// that we can deterministically find the cfh corresponding to a
|
|
4891
|
+
// particular key while reading the key.
|
|
4892
|
+
batch.Delete(db_with_cfh->GetCfh(rand_num), key);
|
|
4893
|
+
}
|
|
4894
|
+
// A delete only includes Key+Timestamp (no value).
|
|
4895
|
+
batch_bytes += key_size_ + user_timestamp_size_;
|
|
4896
|
+
bytes += key_size_ + user_timestamp_size_;
|
|
4897
|
+
num_selective_deletes++;
|
|
4898
|
+
// Skip rest of the for-loop (j=0, j<entries_per_batch_,j++).
|
|
4899
|
+
skip_for_loop = true;
|
|
4900
|
+
} else {
|
|
4901
|
+
assert(false); // should never reach this point.
|
|
4902
|
+
}
|
|
4903
|
+
// If disposable_entries_q needs to be updated (ie: when a selective
|
|
4904
|
+
// insert+delete was successfully completed, pop the job out of the
|
|
4905
|
+
// queue).
|
|
4906
|
+
if (!disposable_entries_q[id].empty() &&
|
|
4907
|
+
(disposable_entries_q[id].front().first <
|
|
4908
|
+
FLAGS_env->NowMicros()) &&
|
|
4909
|
+
persistent_ent_and_del_index[id] == kNumDispAndPersEntries) {
|
|
4910
|
+
disposable_entries_q[id].pop();
|
|
4911
|
+
persistent_ent_and_del_index[id] = 0;
|
|
4912
|
+
}
|
|
4913
|
+
|
|
4914
|
+
// If we are deleting disposable entries, skip the rest of the
|
|
4915
|
+
// for-loop since there is no key-value inserts at this moment in
|
|
4916
|
+
// time.
|
|
4917
|
+
if (skip_for_loop) {
|
|
4918
|
+
continue;
|
|
4919
|
+
}
|
|
4920
|
+
|
|
4921
|
+
}
|
|
4922
|
+
// If no job is in the queue, then we keep inserting disposable KV
|
|
4923
|
+
// entries that will be deleted later by a series of deletes.
|
|
4924
|
+
else {
|
|
4925
|
+
rand_num = key_gens[id]->Fetch(disposable_entries_index[id]);
|
|
4926
|
+
disposable_entries_index[id]++;
|
|
4927
|
+
is_disposable_entry = true;
|
|
4928
|
+
if ((disposable_entries_index[id] %
|
|
4929
|
+
FLAGS_disposable_entries_batch_size) == 0) {
|
|
4930
|
+
// Skip the persistent KV entries inserts for now
|
|
4931
|
+
disposable_entries_index[id] +=
|
|
4932
|
+
FLAGS_persistent_entries_batch_size;
|
|
4933
|
+
}
|
|
4934
|
+
}
|
|
4935
|
+
} else {
|
|
4936
|
+
rand_num = key_gens[id]->Next();
|
|
4937
|
+
}
|
|
4462
4938
|
GenerateKeyFromInt(rand_num, FLAGS_num, &key);
|
|
4463
|
-
Slice val
|
|
4939
|
+
Slice val;
|
|
4940
|
+
if (kNumDispAndPersEntries > 0) {
|
|
4941
|
+
random_value = rnd_disposable_entry.RandomString(
|
|
4942
|
+
is_disposable_entry ? FLAGS_disposable_entries_value_size
|
|
4943
|
+
: FLAGS_persistent_entries_value_size);
|
|
4944
|
+
val = Slice(random_value);
|
|
4945
|
+
num_unique_keys++;
|
|
4946
|
+
} else {
|
|
4947
|
+
val = gen.Generate();
|
|
4948
|
+
}
|
|
4464
4949
|
if (use_blob_db_) {
|
|
4465
4950
|
#ifndef ROCKSDB_LITE
|
|
4951
|
+
// Stacked BlobDB
|
|
4466
4952
|
blob_db::BlobDB* blobdb =
|
|
4467
4953
|
static_cast<blob_db::BlobDB*>(db_with_cfh->db);
|
|
4468
4954
|
if (FLAGS_blob_db_max_ttl_range > 0) {
|
|
@@ -4484,6 +4970,23 @@ class Benchmark {
|
|
|
4484
4970
|
batch_bytes += val.size() + key_size_ + user_timestamp_size_;
|
|
4485
4971
|
bytes += val.size() + key_size_ + user_timestamp_size_;
|
|
4486
4972
|
++num_written;
|
|
4973
|
+
|
|
4974
|
+
// If all disposable entries have been inserted, then we need to
|
|
4975
|
+
// add in the job queue a call for 'persistent entry insertions +
|
|
4976
|
+
// disposable entry deletions'.
|
|
4977
|
+
if (kNumDispAndPersEntries > 0 && is_disposable_entry &&
|
|
4978
|
+
((disposable_entries_index[id] % kNumDispAndPersEntries) == 0)) {
|
|
4979
|
+
// Queue contains [timestamp, starting_idx],
|
|
4980
|
+
// timestamp = current_time + delay (minimum aboslute time when to
|
|
4981
|
+
// start inserting the selective deletes) starting_idx = index in the
|
|
4982
|
+
// keygen of the rand_num to generate the key of the first KV entry to
|
|
4983
|
+
// delete (= key of the first selective delete).
|
|
4984
|
+
disposable_entries_q[id].push(std::make_pair(
|
|
4985
|
+
FLAGS_env->NowMicros() +
|
|
4986
|
+
FLAGS_disposable_entries_delete_delay /* timestamp */,
|
|
4987
|
+
disposable_entries_index[id] - kNumDispAndPersEntries
|
|
4988
|
+
/*starting idx*/));
|
|
4989
|
+
}
|
|
4487
4990
|
if (writes_per_range_tombstone_ > 0 &&
|
|
4488
4991
|
num_written > writes_before_delete_range_ &&
|
|
4489
4992
|
(num_written - writes_before_delete_range_) /
|
|
@@ -4500,6 +5003,7 @@ class Benchmark {
|
|
|
4500
5003
|
&expanded_keys[offset]);
|
|
4501
5004
|
if (use_blob_db_) {
|
|
4502
5005
|
#ifndef ROCKSDB_LITE
|
|
5006
|
+
// Stacked BlobDB
|
|
4503
5007
|
s = db_with_cfh->db->Delete(write_options_,
|
|
4504
5008
|
expanded_keys[offset]);
|
|
4505
5009
|
#endif // ROCKSDB_LITE
|
|
@@ -4516,6 +5020,7 @@ class Benchmark {
|
|
|
4516
5020
|
&end_key);
|
|
4517
5021
|
if (use_blob_db_) {
|
|
4518
5022
|
#ifndef ROCKSDB_LITE
|
|
5023
|
+
// Stacked BlobDB
|
|
4519
5024
|
s = db_with_cfh->db->DeleteRange(
|
|
4520
5025
|
write_options_, db_with_cfh->db->DefaultColumnFamily(),
|
|
4521
5026
|
begin_key, end_key);
|
|
@@ -4540,7 +5045,8 @@ class Benchmark {
|
|
|
4540
5045
|
}
|
|
4541
5046
|
if (user_timestamp_size_ > 0) {
|
|
4542
5047
|
Slice user_ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
4543
|
-
s = batch.
|
|
5048
|
+
s = batch.UpdateTimestamps(
|
|
5049
|
+
user_ts, [this](uint32_t) { return user_timestamp_size_; });
|
|
4544
5050
|
if (!s.ok()) {
|
|
4545
5051
|
fprintf(stderr, "assign timestamp to write batch: %s\n",
|
|
4546
5052
|
s.ToString().c_str());
|
|
@@ -4548,6 +5054,7 @@ class Benchmark {
|
|
|
4548
5054
|
}
|
|
4549
5055
|
}
|
|
4550
5056
|
if (!use_blob_db_) {
|
|
5057
|
+
// Not stacked BlobDB
|
|
4551
5058
|
s = db_with_cfh->db->Write(write_options_, &batch);
|
|
4552
5059
|
}
|
|
4553
5060
|
thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db,
|
|
@@ -4582,6 +5089,17 @@ class Benchmark {
|
|
|
4582
5089
|
ErrorExit();
|
|
4583
5090
|
}
|
|
4584
5091
|
}
|
|
5092
|
+
if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
|
|
5093
|
+
fprintf(stdout,
|
|
5094
|
+
"Number of unique keys inserted: %" PRIu64
|
|
5095
|
+
".\nNumber of overwrites: %" PRIu64 "\n",
|
|
5096
|
+
num_unique_keys, num_overwrites);
|
|
5097
|
+
} else if (kNumDispAndPersEntries > 0) {
|
|
5098
|
+
fprintf(stdout,
|
|
5099
|
+
"Number of unique keys inserted (disposable+persistent): %" PRIu64
|
|
5100
|
+
".\nNumber of 'disposable entry delete': %" PRIu64 "\n",
|
|
5101
|
+
num_written, num_selective_deletes);
|
|
5102
|
+
}
|
|
4585
5103
|
thread->stats.AddBytes(bytes);
|
|
4586
5104
|
}
|
|
4587
5105
|
|
|
@@ -4860,7 +5378,7 @@ class Benchmark {
|
|
|
4860
5378
|
}
|
|
4861
5379
|
if (levelMeta.level == 0) {
|
|
4862
5380
|
for (auto& fileMeta : levelMeta.files) {
|
|
4863
|
-
fprintf(stdout, "Level[%d]: %s(size: %"
|
|
5381
|
+
fprintf(stdout, "Level[%d]: %s(size: %" PRIi64 " bytes)\n",
|
|
4864
5382
|
levelMeta.level, fileMeta.name.c_str(), fileMeta.size);
|
|
4865
5383
|
}
|
|
4866
5384
|
} else {
|
|
@@ -4901,8 +5419,7 @@ class Benchmark {
|
|
|
4901
5419
|
}
|
|
4902
5420
|
|
|
4903
5421
|
void ReadSequential(ThreadState* thread, DB* db) {
|
|
4904
|
-
ReadOptions options
|
|
4905
|
-
options.tailing = FLAGS_use_tailing_iterator;
|
|
5422
|
+
ReadOptions options = read_options_;
|
|
4906
5423
|
std::unique_ptr<char[]> ts_guard;
|
|
4907
5424
|
Slice ts;
|
|
4908
5425
|
if (user_timestamp_size_ > 0) {
|
|
@@ -4911,6 +5428,9 @@ class Benchmark {
|
|
|
4911
5428
|
options.timestamp = &ts;
|
|
4912
5429
|
}
|
|
4913
5430
|
|
|
5431
|
+
options.adaptive_readahead = FLAGS_adaptive_readahead;
|
|
5432
|
+
options.async_io = FLAGS_async_io;
|
|
5433
|
+
|
|
4914
5434
|
Iterator* iter = db->NewIterator(options);
|
|
4915
5435
|
int64_t i = 0;
|
|
4916
5436
|
int64_t bytes = 0;
|
|
@@ -4940,7 +5460,6 @@ class Benchmark {
|
|
|
4940
5460
|
int64_t found = 0;
|
|
4941
5461
|
int64_t bytes = 0;
|
|
4942
5462
|
int64_t key_rand = 0;
|
|
4943
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
4944
5463
|
std::unique_ptr<const char[]> key_guard;
|
|
4945
5464
|
Slice key = AllocateKey(&key_guard);
|
|
4946
5465
|
PinnableSlice pinnable_val;
|
|
@@ -4955,11 +5474,11 @@ class Benchmark {
|
|
|
4955
5474
|
read++;
|
|
4956
5475
|
Status s;
|
|
4957
5476
|
if (FLAGS_num_column_families > 1) {
|
|
4958
|
-
s = db_with_cfh->db->Get(
|
|
4959
|
-
&pinnable_val);
|
|
5477
|
+
s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
|
|
5478
|
+
key, &pinnable_val);
|
|
4960
5479
|
} else {
|
|
4961
5480
|
pinnable_val.Reset();
|
|
4962
|
-
s = db_with_cfh->db->Get(
|
|
5481
|
+
s = db_with_cfh->db->Get(read_options_,
|
|
4963
5482
|
db_with_cfh->db->DefaultColumnFamily(), key,
|
|
4964
5483
|
&pinnable_val);
|
|
4965
5484
|
}
|
|
@@ -5005,7 +5524,7 @@ class Benchmark {
|
|
|
5005
5524
|
}
|
|
5006
5525
|
|
|
5007
5526
|
void ReadReverse(ThreadState* thread, DB* db) {
|
|
5008
|
-
Iterator* iter = db->NewIterator(
|
|
5527
|
+
Iterator* iter = db->NewIterator(read_options_);
|
|
5009
5528
|
int64_t i = 0;
|
|
5010
5529
|
int64_t bytes = 0;
|
|
5011
5530
|
for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
|
|
@@ -5027,7 +5546,7 @@ class Benchmark {
|
|
|
5027
5546
|
int64_t read = 0;
|
|
5028
5547
|
int64_t found = 0;
|
|
5029
5548
|
int64_t nonexist = 0;
|
|
5030
|
-
ReadOptions options
|
|
5549
|
+
ReadOptions options = read_options_;
|
|
5031
5550
|
std::unique_ptr<const char[]> key_guard;
|
|
5032
5551
|
Slice key = AllocateKey(&key_guard);
|
|
5033
5552
|
std::string value;
|
|
@@ -5117,10 +5636,16 @@ class Benchmark {
|
|
|
5117
5636
|
int64_t bytes = 0;
|
|
5118
5637
|
int num_keys = 0;
|
|
5119
5638
|
int64_t key_rand = 0;
|
|
5120
|
-
ReadOptions options
|
|
5639
|
+
ReadOptions options = read_options_;
|
|
5121
5640
|
std::unique_ptr<const char[]> key_guard;
|
|
5122
5641
|
Slice key = AllocateKey(&key_guard);
|
|
5123
5642
|
PinnableSlice pinnable_val;
|
|
5643
|
+
std::vector<PinnableSlice> pinnable_vals;
|
|
5644
|
+
if (read_operands_) {
|
|
5645
|
+
// Start off with a small-ish value that'll be increased later if
|
|
5646
|
+
// `GetMergeOperands()` tells us it is not large enough.
|
|
5647
|
+
pinnable_vals.resize(8);
|
|
5648
|
+
}
|
|
5124
5649
|
std::unique_ptr<char[]> ts_guard;
|
|
5125
5650
|
Slice ts;
|
|
5126
5651
|
if (user_timestamp_size_ > 0) {
|
|
@@ -5157,18 +5682,46 @@ class Benchmark {
|
|
|
5157
5682
|
ts_ptr = &ts_ret;
|
|
5158
5683
|
}
|
|
5159
5684
|
Status s;
|
|
5685
|
+
pinnable_val.Reset();
|
|
5686
|
+
for (size_t i = 0; i < pinnable_vals.size(); ++i) {
|
|
5687
|
+
pinnable_vals[i].Reset();
|
|
5688
|
+
}
|
|
5689
|
+
ColumnFamilyHandle* cfh;
|
|
5160
5690
|
if (FLAGS_num_column_families > 1) {
|
|
5161
|
-
|
|
5162
|
-
&pinnable_val, ts_ptr);
|
|
5691
|
+
cfh = db_with_cfh->GetCfh(key_rand);
|
|
5163
5692
|
} else {
|
|
5164
|
-
|
|
5165
|
-
|
|
5166
|
-
|
|
5167
|
-
|
|
5693
|
+
cfh = db_with_cfh->db->DefaultColumnFamily();
|
|
5694
|
+
}
|
|
5695
|
+
if (read_operands_) {
|
|
5696
|
+
GetMergeOperandsOptions get_merge_operands_options;
|
|
5697
|
+
get_merge_operands_options.expected_max_number_of_operands =
|
|
5698
|
+
static_cast<int>(pinnable_vals.size());
|
|
5699
|
+
int number_of_operands;
|
|
5700
|
+
s = db_with_cfh->db->GetMergeOperands(
|
|
5701
|
+
options, cfh, key, pinnable_vals.data(),
|
|
5702
|
+
&get_merge_operands_options, &number_of_operands);
|
|
5703
|
+
if (s.IsIncomplete()) {
|
|
5704
|
+
// Should only happen a few times when we encounter a key that had
|
|
5705
|
+
// more merge operands than any key seen so far. Production use case
|
|
5706
|
+
// would typically retry in such event to get all the operands so do
|
|
5707
|
+
// that here.
|
|
5708
|
+
pinnable_vals.resize(number_of_operands);
|
|
5709
|
+
get_merge_operands_options.expected_max_number_of_operands =
|
|
5710
|
+
static_cast<int>(pinnable_vals.size());
|
|
5711
|
+
s = db_with_cfh->db->GetMergeOperands(
|
|
5712
|
+
options, cfh, key, pinnable_vals.data(),
|
|
5713
|
+
&get_merge_operands_options, &number_of_operands);
|
|
5714
|
+
}
|
|
5715
|
+
} else {
|
|
5716
|
+
s = db_with_cfh->db->Get(options, cfh, key, &pinnable_val, ts_ptr);
|
|
5168
5717
|
}
|
|
5718
|
+
|
|
5169
5719
|
if (s.ok()) {
|
|
5170
5720
|
found++;
|
|
5171
5721
|
bytes += key.size() + pinnable_val.size() + user_timestamp_size_;
|
|
5722
|
+
for (size_t i = 0; i < pinnable_vals.size(); ++i) {
|
|
5723
|
+
bytes += pinnable_vals[i].size();
|
|
5724
|
+
}
|
|
5172
5725
|
} else if (!s.IsNotFound()) {
|
|
5173
5726
|
fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
|
|
5174
5727
|
abort();
|
|
@@ -5200,9 +5753,10 @@ class Benchmark {
|
|
|
5200
5753
|
// Returns the total number of keys found.
|
|
5201
5754
|
void MultiReadRandom(ThreadState* thread) {
|
|
5202
5755
|
int64_t read = 0;
|
|
5756
|
+
int64_t bytes = 0;
|
|
5203
5757
|
int64_t num_multireads = 0;
|
|
5204
5758
|
int64_t found = 0;
|
|
5205
|
-
ReadOptions options
|
|
5759
|
+
ReadOptions options = read_options_;
|
|
5206
5760
|
std::vector<Slice> keys;
|
|
5207
5761
|
std::vector<std::unique_ptr<const char[]> > key_guards;
|
|
5208
5762
|
std::vector<std::string> values(entries_per_batch_);
|
|
@@ -5250,6 +5804,7 @@ class Benchmark {
|
|
|
5250
5804
|
num_multireads++;
|
|
5251
5805
|
for (int64_t i = 0; i < entries_per_batch_; ++i) {
|
|
5252
5806
|
if (statuses[i].ok()) {
|
|
5807
|
+
bytes += keys[i].size() + values[i].size() + user_timestamp_size_;
|
|
5253
5808
|
++found;
|
|
5254
5809
|
} else if (!statuses[i].IsNotFound()) {
|
|
5255
5810
|
fprintf(stderr, "MultiGet returned an error: %s\n",
|
|
@@ -5265,6 +5820,8 @@ class Benchmark {
|
|
|
5265
5820
|
num_multireads++;
|
|
5266
5821
|
for (int64_t i = 0; i < entries_per_batch_; ++i) {
|
|
5267
5822
|
if (stat_list[i].ok()) {
|
|
5823
|
+
bytes +=
|
|
5824
|
+
keys[i].size() + pin_values[i].size() + user_timestamp_size_;
|
|
5268
5825
|
++found;
|
|
5269
5826
|
} else if (!stat_list[i].IsNotFound()) {
|
|
5270
5827
|
fprintf(stderr, "MultiGet returned an error: %s\n",
|
|
@@ -5287,6 +5844,7 @@ class Benchmark {
|
|
|
5287
5844
|
char msg[100];
|
|
5288
5845
|
snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)",
|
|
5289
5846
|
found, read);
|
|
5847
|
+
thread->stats.AddBytes(bytes);
|
|
5290
5848
|
thread->stats.AddMessage(msg);
|
|
5291
5849
|
}
|
|
5292
5850
|
|
|
@@ -5547,21 +6105,22 @@ class Benchmark {
|
|
|
5547
6105
|
}
|
|
5548
6106
|
};
|
|
5549
6107
|
|
|
5550
|
-
// The social graph
|
|
6108
|
+
// The social graph workload mixed with Get, Put, Iterator queries.
|
|
5551
6109
|
// The value size and iterator length follow Pareto distribution.
|
|
5552
6110
|
// The overall key access follow power distribution. If user models the
|
|
5553
6111
|
// workload based on different key-ranges (or different prefixes), user
|
|
5554
6112
|
// can use two-term-exponential distribution to fit the workload. User
|
|
5555
|
-
// needs to
|
|
6113
|
+
// needs to decide the ratio between Get, Put, Iterator queries before
|
|
5556
6114
|
// starting the benchmark.
|
|
5557
6115
|
void MixGraph(ThreadState* thread) {
|
|
5558
|
-
int64_t read = 0; // including single gets and Next of iterators
|
|
5559
6116
|
int64_t gets = 0;
|
|
5560
6117
|
int64_t puts = 0;
|
|
5561
|
-
int64_t
|
|
6118
|
+
int64_t get_found = 0;
|
|
5562
6119
|
int64_t seek = 0;
|
|
5563
6120
|
int64_t seek_found = 0;
|
|
5564
6121
|
int64_t bytes = 0;
|
|
6122
|
+
double total_scan_length = 0;
|
|
6123
|
+
double total_val_size = 0;
|
|
5565
6124
|
const int64_t default_value_max = 1 * 1024 * 1024;
|
|
5566
6125
|
int64_t value_max = default_value_max;
|
|
5567
6126
|
int64_t scan_len_max = FLAGS_mix_max_scan_len;
|
|
@@ -5580,17 +6139,15 @@ class Benchmark {
|
|
|
5580
6139
|
value_max = FLAGS_mix_max_value_size;
|
|
5581
6140
|
}
|
|
5582
6141
|
|
|
5583
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
5584
6142
|
std::unique_ptr<const char[]> key_guard;
|
|
5585
6143
|
Slice key = AllocateKey(&key_guard);
|
|
5586
6144
|
PinnableSlice pinnable_val;
|
|
5587
6145
|
query.Initiate(ratio);
|
|
5588
6146
|
|
|
5589
6147
|
// the limit of qps initiation
|
|
5590
|
-
if (
|
|
5591
|
-
thread->shared->read_rate_limiter.reset(
|
|
5592
|
-
static_cast<int64_t>(read_rate)
|
|
5593
|
-
RateLimiter::Mode::kReadsOnly));
|
|
6148
|
+
if (FLAGS_sine_mix_rate) {
|
|
6149
|
+
thread->shared->read_rate_limiter.reset(
|
|
6150
|
+
NewGenericRateLimiter(static_cast<int64_t>(read_rate)));
|
|
5594
6151
|
thread->shared->write_rate_limiter.reset(
|
|
5595
6152
|
NewGenericRateLimiter(static_cast<int64_t>(write_rate)));
|
|
5596
6153
|
}
|
|
@@ -5638,52 +6195,51 @@ class Benchmark {
|
|
|
5638
6195
|
usecs_since_last = 0;
|
|
5639
6196
|
}
|
|
5640
6197
|
|
|
5641
|
-
if (
|
|
5642
|
-
|
|
6198
|
+
if (FLAGS_sine_mix_rate &&
|
|
6199
|
+
usecs_since_last >
|
|
6200
|
+
(FLAGS_sine_mix_rate_interval_milliseconds * uint64_t{1000})) {
|
|
5643
6201
|
double usecs_since_start =
|
|
5644
6202
|
static_cast<double>(now - thread->stats.GetStart());
|
|
5645
6203
|
thread->stats.ResetSineInterval();
|
|
5646
6204
|
double mix_rate_with_noise = AddNoise(
|
|
5647
6205
|
SineRate(usecs_since_start / 1000000.0), FLAGS_sine_mix_rate_noise);
|
|
5648
6206
|
read_rate = mix_rate_with_noise * (query.ratio_[0] + query.ratio_[2]);
|
|
5649
|
-
write_rate =
|
|
5650
|
-
mix_rate_with_noise * query.ratio_[1] * FLAGS_mix_ave_kv_size;
|
|
6207
|
+
write_rate = mix_rate_with_noise * query.ratio_[1];
|
|
5651
6208
|
|
|
5652
|
-
|
|
5653
|
-
|
|
5654
|
-
|
|
5655
|
-
|
|
5656
|
-
|
|
5657
|
-
|
|
6209
|
+
if (read_rate > 0) {
|
|
6210
|
+
thread->shared->read_rate_limiter->SetBytesPerSecond(
|
|
6211
|
+
static_cast<int64_t>(read_rate));
|
|
6212
|
+
}
|
|
6213
|
+
if (write_rate > 0) {
|
|
6214
|
+
thread->shared->write_rate_limiter->SetBytesPerSecond(
|
|
6215
|
+
static_cast<int64_t>(write_rate));
|
|
6216
|
+
}
|
|
5658
6217
|
}
|
|
5659
6218
|
// Start the query
|
|
5660
6219
|
if (query_type == 0) {
|
|
5661
6220
|
// the Get query
|
|
5662
6221
|
gets++;
|
|
5663
|
-
read++;
|
|
5664
6222
|
if (FLAGS_num_column_families > 1) {
|
|
5665
|
-
s = db_with_cfh->db->Get(
|
|
5666
|
-
&pinnable_val);
|
|
6223
|
+
s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
|
|
6224
|
+
key, &pinnable_val);
|
|
5667
6225
|
} else {
|
|
5668
6226
|
pinnable_val.Reset();
|
|
5669
|
-
s = db_with_cfh->db->Get(
|
|
6227
|
+
s = db_with_cfh->db->Get(read_options_,
|
|
5670
6228
|
db_with_cfh->db->DefaultColumnFamily(), key,
|
|
5671
6229
|
&pinnable_val);
|
|
5672
6230
|
}
|
|
5673
6231
|
|
|
5674
6232
|
if (s.ok()) {
|
|
5675
|
-
|
|
6233
|
+
get_found++;
|
|
5676
6234
|
bytes += key.size() + pinnable_val.size();
|
|
5677
6235
|
} else if (!s.IsNotFound()) {
|
|
5678
6236
|
fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
|
|
5679
6237
|
abort();
|
|
5680
6238
|
}
|
|
5681
6239
|
|
|
5682
|
-
if (thread->shared->read_rate_limiter
|
|
5683
|
-
|
|
5684
|
-
|
|
5685
|
-
256, Env::IO_HIGH, nullptr /* stats */,
|
|
5686
|
-
RateLimiter::OpType::kRead);
|
|
6240
|
+
if (thread->shared->read_rate_limiter && (gets + seek) % 100 == 0) {
|
|
6241
|
+
thread->shared->read_rate_limiter->Request(100, Env::IO_HIGH,
|
|
6242
|
+
nullptr /*stats*/);
|
|
5687
6243
|
}
|
|
5688
6244
|
thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db, 1, kRead);
|
|
5689
6245
|
} else if (query_type == 1) {
|
|
@@ -5691,11 +6247,13 @@ class Benchmark {
|
|
|
5691
6247
|
puts++;
|
|
5692
6248
|
int64_t val_size = ParetoCdfInversion(
|
|
5693
6249
|
u, FLAGS_value_theta, FLAGS_value_k, FLAGS_value_sigma);
|
|
5694
|
-
if (val_size <
|
|
6250
|
+
if (val_size < 10) {
|
|
5695
6251
|
val_size = 10;
|
|
5696
6252
|
} else if (val_size > value_max) {
|
|
5697
6253
|
val_size = val_size % value_max;
|
|
5698
6254
|
}
|
|
6255
|
+
total_val_size += val_size;
|
|
6256
|
+
|
|
5699
6257
|
s = db_with_cfh->db->Put(
|
|
5700
6258
|
write_options_, key,
|
|
5701
6259
|
gen.Generate(static_cast<unsigned int>(val_size)));
|
|
@@ -5704,21 +6262,19 @@ class Benchmark {
|
|
|
5704
6262
|
ErrorExit();
|
|
5705
6263
|
}
|
|
5706
6264
|
|
|
5707
|
-
if (thread->shared->write_rate_limiter) {
|
|
5708
|
-
thread->shared->write_rate_limiter->Request(
|
|
5709
|
-
|
|
5710
|
-
RateLimiter::OpType::kWrite);
|
|
6265
|
+
if (thread->shared->write_rate_limiter && puts % 100 == 0) {
|
|
6266
|
+
thread->shared->write_rate_limiter->Request(100, Env::IO_HIGH,
|
|
6267
|
+
nullptr /*stats*/);
|
|
5711
6268
|
}
|
|
5712
6269
|
thread->stats.FinishedOps(db_with_cfh, db_with_cfh->db, 1, kWrite);
|
|
5713
6270
|
} else if (query_type == 2) {
|
|
5714
6271
|
// Seek query
|
|
5715
6272
|
if (db_with_cfh->db != nullptr) {
|
|
5716
6273
|
Iterator* single_iter = nullptr;
|
|
5717
|
-
single_iter = db_with_cfh->db->NewIterator(
|
|
6274
|
+
single_iter = db_with_cfh->db->NewIterator(read_options_);
|
|
5718
6275
|
if (single_iter != nullptr) {
|
|
5719
6276
|
single_iter->Seek(key);
|
|
5720
6277
|
seek++;
|
|
5721
|
-
read++;
|
|
5722
6278
|
if (single_iter->Valid() && single_iter->key().compare(key) == 0) {
|
|
5723
6279
|
seek_found++;
|
|
5724
6280
|
}
|
|
@@ -5733,6 +6289,7 @@ class Benchmark {
|
|
|
5733
6289
|
bytes += single_iter->key().size() + single_iter->value().size();
|
|
5734
6290
|
single_iter->Next();
|
|
5735
6291
|
assert(single_iter->status().ok());
|
|
6292
|
+
total_scan_length++;
|
|
5736
6293
|
}
|
|
5737
6294
|
}
|
|
5738
6295
|
delete single_iter;
|
|
@@ -5742,9 +6299,12 @@ class Benchmark {
|
|
|
5742
6299
|
}
|
|
5743
6300
|
char msg[256];
|
|
5744
6301
|
snprintf(msg, sizeof(msg),
|
|
5745
|
-
"( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64
|
|
5746
|
-
"
|
|
5747
|
-
|
|
6302
|
+
"( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64
|
|
6303
|
+
", reads %" PRIu64 " in %" PRIu64
|
|
6304
|
+
" found, "
|
|
6305
|
+
"avg size: %.1f value, %.1f scan)\n",
|
|
6306
|
+
gets, puts, seek, get_found + seek_found, gets + seek,
|
|
6307
|
+
total_val_size / puts, total_scan_length / seek);
|
|
5748
6308
|
|
|
5749
6309
|
thread->stats.AddBytes(bytes);
|
|
5750
6310
|
thread->stats.AddMessage(msg);
|
|
@@ -5757,7 +6317,7 @@ class Benchmark {
|
|
|
5757
6317
|
|
|
5758
6318
|
void IteratorCreation(ThreadState* thread) {
|
|
5759
6319
|
Duration duration(FLAGS_duration, reads_);
|
|
5760
|
-
ReadOptions options
|
|
6320
|
+
ReadOptions options = read_options_;
|
|
5761
6321
|
std::unique_ptr<char[]> ts_guard;
|
|
5762
6322
|
if (user_timestamp_size_ > 0) {
|
|
5763
6323
|
ts_guard.reset(new char[user_timestamp_size_]);
|
|
@@ -5787,11 +6347,7 @@ class Benchmark {
|
|
|
5787
6347
|
int64_t read = 0;
|
|
5788
6348
|
int64_t found = 0;
|
|
5789
6349
|
int64_t bytes = 0;
|
|
5790
|
-
ReadOptions options
|
|
5791
|
-
options.total_order_seek = FLAGS_total_order_seek;
|
|
5792
|
-
options.prefix_same_as_start = FLAGS_prefix_same_as_start;
|
|
5793
|
-
options.tailing = FLAGS_use_tailing_iterator;
|
|
5794
|
-
options.readahead_size = FLAGS_readahead_size;
|
|
6350
|
+
ReadOptions options = read_options_;
|
|
5795
6351
|
std::unique_ptr<char[]> ts_guard;
|
|
5796
6352
|
Slice ts;
|
|
5797
6353
|
if (user_timestamp_size_ > 0) {
|
|
@@ -5800,13 +6356,14 @@ class Benchmark {
|
|
|
5800
6356
|
options.timestamp = &ts;
|
|
5801
6357
|
}
|
|
5802
6358
|
|
|
5803
|
-
Iterator
|
|
5804
|
-
|
|
5805
|
-
|
|
5806
|
-
|
|
5807
|
-
|
|
5808
|
-
|
|
5809
|
-
|
|
6359
|
+
std::vector<Iterator*> tailing_iters;
|
|
6360
|
+
if (FLAGS_use_tailing_iterator) {
|
|
6361
|
+
if (db_.db != nullptr) {
|
|
6362
|
+
tailing_iters.push_back(db_.db->NewIterator(options));
|
|
6363
|
+
} else {
|
|
6364
|
+
for (const auto& db_with_cfh : multi_dbs_) {
|
|
6365
|
+
tailing_iters.push_back(db_with_cfh.db->NewIterator(options));
|
|
6366
|
+
}
|
|
5810
6367
|
}
|
|
5811
6368
|
}
|
|
5812
6369
|
|
|
@@ -5840,24 +6397,22 @@ class Benchmark {
|
|
|
5840
6397
|
}
|
|
5841
6398
|
}
|
|
5842
6399
|
|
|
5843
|
-
|
|
6400
|
+
// Pick a Iterator to use
|
|
6401
|
+
uint64_t db_idx_to_use =
|
|
6402
|
+
(db_.db == nullptr)
|
|
6403
|
+
? (uint64_t{thread->rand.Next()} % multi_dbs_.size())
|
|
6404
|
+
: 0;
|
|
6405
|
+
std::unique_ptr<Iterator> single_iter;
|
|
6406
|
+
Iterator* iter_to_use;
|
|
6407
|
+
if (FLAGS_use_tailing_iterator) {
|
|
6408
|
+
iter_to_use = tailing_iters[db_idx_to_use];
|
|
6409
|
+
} else {
|
|
5844
6410
|
if (db_.db != nullptr) {
|
|
5845
|
-
|
|
5846
|
-
single_iter = db_.db->NewIterator(options);
|
|
6411
|
+
single_iter.reset(db_.db->NewIterator(options));
|
|
5847
6412
|
} else {
|
|
5848
|
-
|
|
5849
|
-
delete iter;
|
|
5850
|
-
}
|
|
5851
|
-
multi_iters.clear();
|
|
5852
|
-
for (const auto& db_with_cfh : multi_dbs_) {
|
|
5853
|
-
multi_iters.push_back(db_with_cfh.db->NewIterator(options));
|
|
5854
|
-
}
|
|
6413
|
+
single_iter.reset(multi_dbs_[db_idx_to_use].db->NewIterator(options));
|
|
5855
6414
|
}
|
|
5856
|
-
|
|
5857
|
-
// Pick a Iterator to use
|
|
5858
|
-
Iterator* iter_to_use = single_iter;
|
|
5859
|
-
if (single_iter == nullptr) {
|
|
5860
|
-
iter_to_use = multi_iters[thread->rand.Next() % multi_iters.size()];
|
|
6415
|
+
iter_to_use = single_iter.get();
|
|
5861
6416
|
}
|
|
5862
6417
|
|
|
5863
6418
|
iter_to_use->Seek(key);
|
|
@@ -5889,8 +6444,7 @@ class Benchmark {
|
|
|
5889
6444
|
|
|
5890
6445
|
thread->stats.FinishedOps(&db_, db_.db, 1, kSeek);
|
|
5891
6446
|
}
|
|
5892
|
-
|
|
5893
|
-
for (auto iter : multi_iters) {
|
|
6447
|
+
for (auto iter : tailing_iters) {
|
|
5894
6448
|
delete iter;
|
|
5895
6449
|
}
|
|
5896
6450
|
|
|
@@ -5923,7 +6477,7 @@ class Benchmark {
|
|
|
5923
6477
|
|
|
5924
6478
|
void DoDelete(ThreadState* thread, bool seq) {
|
|
5925
6479
|
WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
|
|
5926
|
-
user_timestamp_size_);
|
|
6480
|
+
/*protection_bytes_per_key=*/0, user_timestamp_size_);
|
|
5927
6481
|
Duration duration(seq ? 0 : FLAGS_duration, deletes_);
|
|
5928
6482
|
int64_t i = 0;
|
|
5929
6483
|
std::unique_ptr<const char[]> key_guard;
|
|
@@ -5945,7 +6499,8 @@ class Benchmark {
|
|
|
5945
6499
|
Status s;
|
|
5946
6500
|
if (user_timestamp_size_ > 0) {
|
|
5947
6501
|
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
5948
|
-
s = batch.
|
|
6502
|
+
s = batch.UpdateTimestamps(
|
|
6503
|
+
ts, [this](uint32_t) { return user_timestamp_size_; });
|
|
5949
6504
|
if (!s.ok()) {
|
|
5950
6505
|
fprintf(stderr, "assign timestamp: %s\n", s.ToString().c_str());
|
|
5951
6506
|
ErrorExit();
|
|
@@ -6039,17 +6594,17 @@ class Benchmark {
|
|
|
6039
6594
|
Slice ts;
|
|
6040
6595
|
if (user_timestamp_size_ > 0) {
|
|
6041
6596
|
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6042
|
-
write_options_.timestamp = &ts;
|
|
6043
6597
|
}
|
|
6044
6598
|
if (write_merge == kWrite) {
|
|
6045
|
-
|
|
6599
|
+
if (user_timestamp_size_ == 0) {
|
|
6600
|
+
s = db->Put(write_options_, key, val);
|
|
6601
|
+
} else {
|
|
6602
|
+
s = db->Put(write_options_, key, ts, val);
|
|
6603
|
+
}
|
|
6046
6604
|
} else {
|
|
6047
6605
|
s = db->Merge(write_options_, key, val);
|
|
6048
6606
|
}
|
|
6049
6607
|
// Restore write_options_
|
|
6050
|
-
if (user_timestamp_size_ > 0) {
|
|
6051
|
-
write_options_.timestamp = nullptr;
|
|
6052
|
-
}
|
|
6053
6608
|
written++;
|
|
6054
6609
|
|
|
6055
6610
|
if (!s.ok()) {
|
|
@@ -6082,7 +6637,7 @@ class Benchmark {
|
|
|
6082
6637
|
abort();
|
|
6083
6638
|
}
|
|
6084
6639
|
assert(db_.db != nullptr);
|
|
6085
|
-
ReadOptions read_options;
|
|
6640
|
+
ReadOptions read_options = read_options_;
|
|
6086
6641
|
std::unique_ptr<char[]> ts_guard;
|
|
6087
6642
|
Slice ts;
|
|
6088
6643
|
if (user_timestamp_size_ > 0) {
|
|
@@ -6122,7 +6677,7 @@ class Benchmark {
|
|
|
6122
6677
|
std::string keys[3];
|
|
6123
6678
|
|
|
6124
6679
|
WriteBatch batch(/*reserved_bytes=*/0, /*max_bytes=*/0,
|
|
6125
|
-
user_timestamp_size_);
|
|
6680
|
+
/*protection_bytes_per_key=*/0, user_timestamp_size_);
|
|
6126
6681
|
Status s;
|
|
6127
6682
|
for (int i = 0; i < 3; i++) {
|
|
6128
6683
|
keys[i] = key.ToString() + suffixes[i];
|
|
@@ -6133,7 +6688,8 @@ class Benchmark {
|
|
|
6133
6688
|
if (user_timestamp_size_ > 0) {
|
|
6134
6689
|
ts_guard.reset(new char[user_timestamp_size_]);
|
|
6135
6690
|
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6136
|
-
s = batch.
|
|
6691
|
+
s = batch.UpdateTimestamps(
|
|
6692
|
+
ts, [this](uint32_t) { return user_timestamp_size_; });
|
|
6137
6693
|
if (!s.ok()) {
|
|
6138
6694
|
fprintf(stderr, "assign timestamp to batch: %s\n",
|
|
6139
6695
|
s.ToString().c_str());
|
|
@@ -6153,7 +6709,8 @@ class Benchmark {
|
|
|
6153
6709
|
std::string suffixes[3] = {"1", "2", "0"};
|
|
6154
6710
|
std::string keys[3];
|
|
6155
6711
|
|
|
6156
|
-
WriteBatch batch(0, 0,
|
|
6712
|
+
WriteBatch batch(0, 0, /*protection_bytes_per_key=*/0,
|
|
6713
|
+
user_timestamp_size_);
|
|
6157
6714
|
Status s;
|
|
6158
6715
|
for (int i = 0; i < 3; i++) {
|
|
6159
6716
|
keys[i] = key.ToString() + suffixes[i];
|
|
@@ -6164,7 +6721,8 @@ class Benchmark {
|
|
|
6164
6721
|
if (user_timestamp_size_ > 0) {
|
|
6165
6722
|
ts_guard.reset(new char[user_timestamp_size_]);
|
|
6166
6723
|
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6167
|
-
s = batch.
|
|
6724
|
+
s = batch.UpdateTimestamps(
|
|
6725
|
+
ts, [this](uint32_t) { return user_timestamp_size_; });
|
|
6168
6726
|
if (!s.ok()) {
|
|
6169
6727
|
fprintf(stderr, "assign timestamp to batch: %s\n",
|
|
6170
6728
|
s.ToString().c_str());
|
|
@@ -6179,13 +6737,12 @@ class Benchmark {
|
|
|
6179
6737
|
// Given a key K and value V, this gets values for K+"0", K+"1" and K+"2"
|
|
6180
6738
|
// in the same snapshot, and verifies that all the values are identical.
|
|
6181
6739
|
// ASSUMES that PutMany was used to put (K, V) into the DB.
|
|
6182
|
-
Status GetMany(DB* db, const
|
|
6183
|
-
std::string* value) {
|
|
6740
|
+
Status GetMany(DB* db, const Slice& key, std::string* value) {
|
|
6184
6741
|
std::string suffixes[3] = {"0", "1", "2"};
|
|
6185
6742
|
std::string keys[3];
|
|
6186
6743
|
Slice key_slices[3];
|
|
6187
6744
|
std::string values[3];
|
|
6188
|
-
ReadOptions readoptionscopy =
|
|
6745
|
+
ReadOptions readoptionscopy = read_options_;
|
|
6189
6746
|
|
|
6190
6747
|
std::unique_ptr<char[]> ts_guard;
|
|
6191
6748
|
Slice ts;
|
|
@@ -6233,7 +6790,6 @@ class Benchmark {
|
|
|
6233
6790
|
// FLAGS_numdistinct distinct keys instead of FLAGS_num distinct keys.
|
|
6234
6791
|
// (d) Does not have a MultiGet option.
|
|
6235
6792
|
void RandomWithVerify(ThreadState* thread) {
|
|
6236
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
6237
6793
|
RandomGenerator gen;
|
|
6238
6794
|
std::string value;
|
|
6239
6795
|
int64_t found = 0;
|
|
@@ -6260,7 +6816,7 @@ class Benchmark {
|
|
|
6260
6816
|
FLAGS_numdistinct, &key);
|
|
6261
6817
|
if (get_weight > 0) {
|
|
6262
6818
|
// do all the gets first
|
|
6263
|
-
Status s = GetMany(db,
|
|
6819
|
+
Status s = GetMany(db, key, &value);
|
|
6264
6820
|
if (!s.ok() && !s.IsNotFound()) {
|
|
6265
6821
|
fprintf(stderr, "getmany error: %s\n", s.ToString().c_str());
|
|
6266
6822
|
// we continue after error rather than exiting so that we can
|
|
@@ -6304,7 +6860,7 @@ class Benchmark {
|
|
|
6304
6860
|
// This is different from ReadWhileWriting because it does not use
|
|
6305
6861
|
// an extra thread.
|
|
6306
6862
|
void ReadRandomWriteRandom(ThreadState* thread) {
|
|
6307
|
-
ReadOptions options
|
|
6863
|
+
ReadOptions options = read_options_;
|
|
6308
6864
|
RandomGenerator gen;
|
|
6309
6865
|
std::string value;
|
|
6310
6866
|
int64_t found = 0;
|
|
@@ -6353,12 +6909,13 @@ class Benchmark {
|
|
|
6353
6909
|
} else if (put_weight > 0) {
|
|
6354
6910
|
// then do all the corresponding number of puts
|
|
6355
6911
|
// for all the gets we have done earlier
|
|
6356
|
-
|
|
6912
|
+
Status s;
|
|
6357
6913
|
if (user_timestamp_size_ > 0) {
|
|
6358
|
-
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6359
|
-
|
|
6914
|
+
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6915
|
+
s = db->Put(write_options_, key, ts, gen.Generate());
|
|
6916
|
+
} else {
|
|
6917
|
+
s = db->Put(write_options_, key, gen.Generate());
|
|
6360
6918
|
}
|
|
6361
|
-
Status s = db->Put(write_options_, key, gen.Generate());
|
|
6362
6919
|
if (!s.ok()) {
|
|
6363
6920
|
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
|
6364
6921
|
ErrorExit();
|
|
@@ -6378,7 +6935,7 @@ class Benchmark {
|
|
|
6378
6935
|
//
|
|
6379
6936
|
// Read-modify-write for random keys
|
|
6380
6937
|
void UpdateRandom(ThreadState* thread) {
|
|
6381
|
-
ReadOptions options
|
|
6938
|
+
ReadOptions options = read_options_;
|
|
6382
6939
|
RandomGenerator gen;
|
|
6383
6940
|
std::string value;
|
|
6384
6941
|
int64_t found = 0;
|
|
@@ -6419,11 +6976,13 @@ class Benchmark {
|
|
|
6419
6976
|
}
|
|
6420
6977
|
|
|
6421
6978
|
Slice val = gen.Generate();
|
|
6979
|
+
Status s;
|
|
6422
6980
|
if (user_timestamp_size_ > 0) {
|
|
6423
6981
|
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6424
|
-
|
|
6982
|
+
s = db->Put(write_options_, key, ts, val);
|
|
6983
|
+
} else {
|
|
6984
|
+
s = db->Put(write_options_, key, val);
|
|
6425
6985
|
}
|
|
6426
|
-
Status s = db->Put(write_options_, key, val);
|
|
6427
6986
|
if (!s.ok()) {
|
|
6428
6987
|
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
|
6429
6988
|
exit(1);
|
|
@@ -6443,7 +7002,7 @@ class Benchmark {
|
|
|
6443
7002
|
// representing the existing value, we generate an array B of the same size,
|
|
6444
7003
|
// then compute C = A^B as C[i]=A[i]^B[i], and store C
|
|
6445
7004
|
void XORUpdateRandom(ThreadState* thread) {
|
|
6446
|
-
ReadOptions options
|
|
7005
|
+
ReadOptions options = read_options_;
|
|
6447
7006
|
RandomGenerator gen;
|
|
6448
7007
|
std::string existing_value;
|
|
6449
7008
|
int64_t found = 0;
|
|
@@ -6486,12 +7045,13 @@ class Benchmark {
|
|
|
6486
7045
|
xor_operator.XOR(nullptr, value, &new_value);
|
|
6487
7046
|
}
|
|
6488
7047
|
|
|
7048
|
+
Status s;
|
|
6489
7049
|
if (user_timestamp_size_ > 0) {
|
|
6490
7050
|
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6491
|
-
|
|
7051
|
+
s = db->Put(write_options_, key, ts, Slice(new_value));
|
|
7052
|
+
} else {
|
|
7053
|
+
s = db->Put(write_options_, key, Slice(new_value));
|
|
6492
7054
|
}
|
|
6493
|
-
|
|
6494
|
-
Status s = db->Put(write_options_, key, Slice(new_value));
|
|
6495
7055
|
if (!s.ok()) {
|
|
6496
7056
|
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
|
6497
7057
|
ErrorExit();
|
|
@@ -6508,7 +7068,7 @@ class Benchmark {
|
|
|
6508
7068
|
// Each operation causes the key grow by value_size (simulating an append).
|
|
6509
7069
|
// Generally used for benchmarking against merges of similar type
|
|
6510
7070
|
void AppendRandom(ThreadState* thread) {
|
|
6511
|
-
ReadOptions options
|
|
7071
|
+
ReadOptions options = read_options_;
|
|
6512
7072
|
RandomGenerator gen;
|
|
6513
7073
|
std::string value;
|
|
6514
7074
|
int64_t found = 0;
|
|
@@ -6552,13 +7112,14 @@ class Benchmark {
|
|
|
6552
7112
|
}
|
|
6553
7113
|
value.append(operand.data(), operand.size());
|
|
6554
7114
|
|
|
7115
|
+
Status s;
|
|
6555
7116
|
if (user_timestamp_size_ > 0) {
|
|
6556
7117
|
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6557
|
-
|
|
7118
|
+
s = db->Put(write_options_, key, ts, value);
|
|
7119
|
+
} else {
|
|
7120
|
+
// Write back to the database
|
|
7121
|
+
s = db->Put(write_options_, key, value);
|
|
6558
7122
|
}
|
|
6559
|
-
|
|
6560
|
-
// Write back to the database
|
|
6561
|
-
Status s = db->Put(write_options_, key, value);
|
|
6562
7123
|
if (!s.ok()) {
|
|
6563
7124
|
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
|
6564
7125
|
ErrorExit();
|
|
@@ -6631,7 +7192,6 @@ class Benchmark {
|
|
|
6631
7192
|
// As with MergeRandom, the merge operator to use should be defined by
|
|
6632
7193
|
// FLAGS_merge_operator.
|
|
6633
7194
|
void ReadRandomMergeRandom(ThreadState* thread) {
|
|
6634
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
6635
7195
|
RandomGenerator gen;
|
|
6636
7196
|
std::string value;
|
|
6637
7197
|
int64_t num_hits = 0;
|
|
@@ -6658,7 +7218,7 @@ class Benchmark {
|
|
|
6658
7218
|
num_merges++;
|
|
6659
7219
|
thread->stats.FinishedOps(nullptr, db, 1, kMerge);
|
|
6660
7220
|
} else {
|
|
6661
|
-
Status s = db->Get(
|
|
7221
|
+
Status s = db->Get(read_options_, key, &value);
|
|
6662
7222
|
if (value.length() > max_length)
|
|
6663
7223
|
max_length = value.length();
|
|
6664
7224
|
|
|
@@ -6689,7 +7249,7 @@ class Benchmark {
|
|
|
6689
7249
|
thread->stats.Start(thread->tid);
|
|
6690
7250
|
|
|
6691
7251
|
DB* db = SelectDB(thread);
|
|
6692
|
-
ReadOptions read_opts
|
|
7252
|
+
ReadOptions read_opts = read_options_;
|
|
6693
7253
|
std::unique_ptr<char[]> ts_guard;
|
|
6694
7254
|
Slice ts;
|
|
6695
7255
|
if (user_timestamp_size_ > 0) {
|
|
@@ -6816,6 +7376,37 @@ class Benchmark {
|
|
|
6816
7376
|
}
|
|
6817
7377
|
|
|
6818
7378
|
#ifndef ROCKSDB_LITE
|
|
7379
|
+
void VerifyChecksum(ThreadState* thread) {
|
|
7380
|
+
DB* db = SelectDB(thread);
|
|
7381
|
+
ReadOptions ro;
|
|
7382
|
+
ro.adaptive_readahead = FLAGS_adaptive_readahead;
|
|
7383
|
+
ro.async_io = FLAGS_async_io;
|
|
7384
|
+
ro.rate_limiter_priority =
|
|
7385
|
+
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
|
7386
|
+
ro.readahead_size = FLAGS_readahead_size;
|
|
7387
|
+
Status s = db->VerifyChecksum(ro);
|
|
7388
|
+
if (!s.ok()) {
|
|
7389
|
+
fprintf(stderr, "VerifyChecksum() failed: %s\n", s.ToString().c_str());
|
|
7390
|
+
exit(1);
|
|
7391
|
+
}
|
|
7392
|
+
}
|
|
7393
|
+
|
|
7394
|
+
void VerifyFileChecksums(ThreadState* thread) {
|
|
7395
|
+
DB* db = SelectDB(thread);
|
|
7396
|
+
ReadOptions ro;
|
|
7397
|
+
ro.adaptive_readahead = FLAGS_adaptive_readahead;
|
|
7398
|
+
ro.async_io = FLAGS_async_io;
|
|
7399
|
+
ro.rate_limiter_priority =
|
|
7400
|
+
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
|
7401
|
+
ro.readahead_size = FLAGS_readahead_size;
|
|
7402
|
+
Status s = db->VerifyFileChecksums(ro);
|
|
7403
|
+
if (!s.ok()) {
|
|
7404
|
+
fprintf(stderr, "VerifyFileChecksums() failed: %s\n",
|
|
7405
|
+
s.ToString().c_str());
|
|
7406
|
+
exit(1);
|
|
7407
|
+
}
|
|
7408
|
+
}
|
|
7409
|
+
|
|
6819
7410
|
// This benchmark stress tests Transactions. For a given --duration (or
|
|
6820
7411
|
// total number of --writes, a Transaction will perform a read-modify-write
|
|
6821
7412
|
// to increment the value of a key in each of N(--transaction-sets) sets of
|
|
@@ -6829,9 +7420,7 @@ class Benchmark {
|
|
|
6829
7420
|
// RandomTransactionVerify() will then validate the correctness of the results
|
|
6830
7421
|
// by checking if the sum of all keys in each set is the same.
|
|
6831
7422
|
void RandomTransaction(ThreadState* thread) {
|
|
6832
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
6833
7423
|
Duration duration(FLAGS_duration, readwrites_);
|
|
6834
|
-
ReadOptions read_options(FLAGS_verify_checksum, true);
|
|
6835
7424
|
uint16_t num_prefix_ranges = static_cast<uint16_t>(FLAGS_transaction_sets);
|
|
6836
7425
|
uint64_t transactions_done = 0;
|
|
6837
7426
|
|
|
@@ -6845,7 +7434,7 @@ class Benchmark {
|
|
|
6845
7434
|
txn_options.set_snapshot = FLAGS_transaction_set_snapshot;
|
|
6846
7435
|
|
|
6847
7436
|
RandomTransactionInserter inserter(&thread->rand, write_options_,
|
|
6848
|
-
|
|
7437
|
+
read_options_, FLAGS_num,
|
|
6849
7438
|
num_prefix_ranges);
|
|
6850
7439
|
|
|
6851
7440
|
if (FLAGS_num_multi_db > 1) {
|
|
@@ -6937,12 +7526,12 @@ class Benchmark {
|
|
|
6937
7526
|
DB* db = SelectDB(thread);
|
|
6938
7527
|
for (int64_t i = 0; i < FLAGS_numdistinct; i++) {
|
|
6939
7528
|
GenerateKeyFromInt(i * max_counter, FLAGS_num, &key);
|
|
6940
|
-
Slice ts;
|
|
6941
7529
|
if (user_timestamp_size_ > 0) {
|
|
6942
|
-
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6943
|
-
|
|
7530
|
+
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
7531
|
+
s = db->Put(write_options_, key, ts, gen.Generate());
|
|
7532
|
+
} else {
|
|
7533
|
+
s = db->Put(write_options_, key, gen.Generate());
|
|
6944
7534
|
}
|
|
6945
|
-
s = db->Put(write_options_, key, gen.Generate());
|
|
6946
7535
|
if (!s.ok()) {
|
|
6947
7536
|
fprintf(stderr, "Operation failed: %s\n", s.ToString().c_str());
|
|
6948
7537
|
exit(1);
|
|
@@ -6961,22 +7550,24 @@ class Benchmark {
|
|
|
6961
7550
|
static_cast<int64_t>(0));
|
|
6962
7551
|
GenerateKeyFromInt(key_id * max_counter + counters[key_id], FLAGS_num,
|
|
6963
7552
|
&key);
|
|
6964
|
-
Slice ts;
|
|
6965
7553
|
if (user_timestamp_size_ > 0) {
|
|
6966
|
-
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6967
|
-
|
|
7554
|
+
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
7555
|
+
s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key, ts)
|
|
7556
|
+
: db->Delete(write_options_, key, ts);
|
|
7557
|
+
} else {
|
|
7558
|
+
s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key)
|
|
7559
|
+
: db->Delete(write_options_, key);
|
|
6968
7560
|
}
|
|
6969
|
-
s = FLAGS_use_single_deletes ? db->SingleDelete(write_options_, key)
|
|
6970
|
-
: db->Delete(write_options_, key);
|
|
6971
7561
|
if (s.ok()) {
|
|
6972
7562
|
counters[key_id] = (counters[key_id] + 1) % max_counter;
|
|
6973
7563
|
GenerateKeyFromInt(key_id * max_counter + counters[key_id], FLAGS_num,
|
|
6974
7564
|
&key);
|
|
6975
7565
|
if (user_timestamp_size_ > 0) {
|
|
6976
|
-
ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
6977
|
-
|
|
7566
|
+
Slice ts = mock_app_clock_->Allocate(ts_guard.get());
|
|
7567
|
+
s = db->Put(write_options_, key, ts, Slice());
|
|
7568
|
+
} else {
|
|
7569
|
+
s = db->Put(write_options_, key, Slice());
|
|
6978
7570
|
}
|
|
6979
|
-
s = db->Put(write_options_, key, Slice());
|
|
6980
7571
|
}
|
|
6981
7572
|
|
|
6982
7573
|
if (!s.ok()) {
|
|
@@ -6996,7 +7587,6 @@ class Benchmark {
|
|
|
6996
7587
|
}
|
|
6997
7588
|
|
|
6998
7589
|
void TimeSeriesReadOrDelete(ThreadState* thread, bool do_deletion) {
|
|
6999
|
-
ReadOptions options(FLAGS_verify_checksum, true);
|
|
7000
7590
|
int64_t read = 0;
|
|
7001
7591
|
int64_t found = 0;
|
|
7002
7592
|
int64_t bytes = 0;
|
|
@@ -7004,7 +7594,7 @@ class Benchmark {
|
|
|
7004
7594
|
Iterator* iter = nullptr;
|
|
7005
7595
|
// Only work on single database
|
|
7006
7596
|
assert(db_.db != nullptr);
|
|
7007
|
-
iter = db_.db->NewIterator(
|
|
7597
|
+
iter = db_.db->NewIterator(read_options_);
|
|
7008
7598
|
|
|
7009
7599
|
std::unique_ptr<const char[]> key_guard;
|
|
7010
7600
|
Slice key = AllocateKey(&key_guard);
|
|
@@ -7020,7 +7610,7 @@ class Benchmark {
|
|
|
7020
7610
|
}
|
|
7021
7611
|
if (!FLAGS_use_tailing_iterator) {
|
|
7022
7612
|
delete iter;
|
|
7023
|
-
iter = db_.db->NewIterator(
|
|
7613
|
+
iter = db_.db->NewIterator(read_options_);
|
|
7024
7614
|
}
|
|
7025
7615
|
// Pick a Iterator to use
|
|
7026
7616
|
|
|
@@ -7166,6 +7756,167 @@ class Benchmark {
|
|
|
7166
7756
|
}
|
|
7167
7757
|
}
|
|
7168
7758
|
|
|
7759
|
+
#ifndef ROCKSDB_LITE
|
|
7760
|
+
void WaitForCompactionHelper(DBWithColumnFamilies& db) {
|
|
7761
|
+
// This is an imperfect way of waiting for compaction. The loop and sleep
|
|
7762
|
+
// is done because a thread that finishes a compaction job should get a
|
|
7763
|
+
// chance to pickup a new compaction job.
|
|
7764
|
+
|
|
7765
|
+
std::vector<std::string> keys = {DB::Properties::kMemTableFlushPending,
|
|
7766
|
+
DB::Properties::kNumRunningFlushes,
|
|
7767
|
+
DB::Properties::kCompactionPending,
|
|
7768
|
+
DB::Properties::kNumRunningCompactions};
|
|
7769
|
+
|
|
7770
|
+
fprintf(stdout, "waitforcompaction(%s): started\n",
|
|
7771
|
+
db.db->GetName().c_str());
|
|
7772
|
+
|
|
7773
|
+
while (true) {
|
|
7774
|
+
bool retry = false;
|
|
7775
|
+
|
|
7776
|
+
for (const auto& k : keys) {
|
|
7777
|
+
uint64_t v;
|
|
7778
|
+
if (!db.db->GetIntProperty(k, &v)) {
|
|
7779
|
+
fprintf(stderr, "waitforcompaction(%s): GetIntProperty(%s) failed\n",
|
|
7780
|
+
db.db->GetName().c_str(), k.c_str());
|
|
7781
|
+
exit(1);
|
|
7782
|
+
} else if (v > 0) {
|
|
7783
|
+
fprintf(stdout,
|
|
7784
|
+
"waitforcompaction(%s): active(%s). Sleep 10 seconds\n",
|
|
7785
|
+
db.db->GetName().c_str(), k.c_str());
|
|
7786
|
+
FLAGS_env->SleepForMicroseconds(10 * 1000000);
|
|
7787
|
+
retry = true;
|
|
7788
|
+
break;
|
|
7789
|
+
}
|
|
7790
|
+
}
|
|
7791
|
+
|
|
7792
|
+
if (!retry) {
|
|
7793
|
+
fprintf(stdout, "waitforcompaction(%s): finished\n",
|
|
7794
|
+
db.db->GetName().c_str());
|
|
7795
|
+
return;
|
|
7796
|
+
}
|
|
7797
|
+
}
|
|
7798
|
+
}
|
|
7799
|
+
|
|
7800
|
+
void WaitForCompaction() {
|
|
7801
|
+
// Give background threads a chance to wake
|
|
7802
|
+
FLAGS_env->SleepForMicroseconds(5 * 1000000);
|
|
7803
|
+
|
|
7804
|
+
// I am skeptical that this check race free. I hope that checking twice
|
|
7805
|
+
// reduces the chance.
|
|
7806
|
+
if (db_.db != nullptr) {
|
|
7807
|
+
WaitForCompactionHelper(db_);
|
|
7808
|
+
WaitForCompactionHelper(db_);
|
|
7809
|
+
} else {
|
|
7810
|
+
for (auto& db_with_cfh : multi_dbs_) {
|
|
7811
|
+
WaitForCompactionHelper(db_with_cfh);
|
|
7812
|
+
WaitForCompactionHelper(db_with_cfh);
|
|
7813
|
+
}
|
|
7814
|
+
}
|
|
7815
|
+
}
|
|
7816
|
+
|
|
7817
|
+
bool CompactLevelHelper(DBWithColumnFamilies& db_with_cfh, int from_level) {
|
|
7818
|
+
std::vector<LiveFileMetaData> files;
|
|
7819
|
+
db_with_cfh.db->GetLiveFilesMetaData(&files);
|
|
7820
|
+
|
|
7821
|
+
assert(from_level == 0 || from_level == 1);
|
|
7822
|
+
|
|
7823
|
+
int real_from_level = from_level;
|
|
7824
|
+
if (real_from_level > 0) {
|
|
7825
|
+
// With dynamic leveled compaction the first level with data beyond L0
|
|
7826
|
+
// might not be L1.
|
|
7827
|
+
real_from_level = std::numeric_limits<int>::max();
|
|
7828
|
+
|
|
7829
|
+
for (auto& f : files) {
|
|
7830
|
+
if (f.level > 0 && f.level < real_from_level) real_from_level = f.level;
|
|
7831
|
+
}
|
|
7832
|
+
|
|
7833
|
+
if (real_from_level == std::numeric_limits<int>::max()) {
|
|
7834
|
+
fprintf(stdout, "compact%d found 0 files to compact\n", from_level);
|
|
7835
|
+
return true;
|
|
7836
|
+
}
|
|
7837
|
+
}
|
|
7838
|
+
|
|
7839
|
+
// The goal is to compact from from_level to the level that follows it,
|
|
7840
|
+
// and with dynamic leveled compaction the next level might not be
|
|
7841
|
+
// real_from_level+1
|
|
7842
|
+
int next_level = std::numeric_limits<int>::max();
|
|
7843
|
+
|
|
7844
|
+
std::vector<std::string> files_to_compact;
|
|
7845
|
+
for (auto& f : files) {
|
|
7846
|
+
if (f.level == real_from_level)
|
|
7847
|
+
files_to_compact.push_back(f.name);
|
|
7848
|
+
else if (f.level > real_from_level && f.level < next_level)
|
|
7849
|
+
next_level = f.level;
|
|
7850
|
+
}
|
|
7851
|
+
|
|
7852
|
+
if (files_to_compact.empty()) {
|
|
7853
|
+
fprintf(stdout, "compact%d found 0 files to compact\n", from_level);
|
|
7854
|
+
return true;
|
|
7855
|
+
} else if (next_level == std::numeric_limits<int>::max()) {
|
|
7856
|
+
// There is no data beyond real_from_level. So we are done.
|
|
7857
|
+
fprintf(stdout, "compact%d found no data beyond L%d\n", from_level,
|
|
7858
|
+
real_from_level);
|
|
7859
|
+
return true;
|
|
7860
|
+
}
|
|
7861
|
+
|
|
7862
|
+
fprintf(stdout, "compact%d found %d files to compact from L%d to L%d\n",
|
|
7863
|
+
from_level, static_cast<int>(files_to_compact.size()),
|
|
7864
|
+
real_from_level, next_level);
|
|
7865
|
+
|
|
7866
|
+
ROCKSDB_NAMESPACE::CompactionOptions options;
|
|
7867
|
+
// Lets RocksDB use the configured compression for this level
|
|
7868
|
+
options.compression = ROCKSDB_NAMESPACE::kDisableCompressionOption;
|
|
7869
|
+
|
|
7870
|
+
ROCKSDB_NAMESPACE::ColumnFamilyDescriptor cfDesc;
|
|
7871
|
+
db_with_cfh.db->DefaultColumnFamily()->GetDescriptor(&cfDesc);
|
|
7872
|
+
options.output_file_size_limit = cfDesc.options.target_file_size_base;
|
|
7873
|
+
|
|
7874
|
+
Status status =
|
|
7875
|
+
db_with_cfh.db->CompactFiles(options, files_to_compact, next_level);
|
|
7876
|
+
if (!status.ok()) {
|
|
7877
|
+
// This can fail for valid reasons including the operation was aborted
|
|
7878
|
+
// or a filename is invalid because background compaction removed it.
|
|
7879
|
+
// Having read the current cases for which an error is raised I prefer
|
|
7880
|
+
// not to figure out whether an exception should be thrown here.
|
|
7881
|
+
fprintf(stderr, "compact%d CompactFiles failed: %s\n", from_level,
|
|
7882
|
+
status.ToString().c_str());
|
|
7883
|
+
return false;
|
|
7884
|
+
}
|
|
7885
|
+
return true;
|
|
7886
|
+
}
|
|
7887
|
+
|
|
7888
|
+
void CompactLevel(int from_level) {
|
|
7889
|
+
if (db_.db != nullptr) {
|
|
7890
|
+
while (!CompactLevelHelper(db_, from_level)) WaitForCompaction();
|
|
7891
|
+
}
|
|
7892
|
+
for (auto& db_with_cfh : multi_dbs_) {
|
|
7893
|
+
while (!CompactLevelHelper(db_with_cfh, from_level)) WaitForCompaction();
|
|
7894
|
+
}
|
|
7895
|
+
}
|
|
7896
|
+
#endif
|
|
7897
|
+
|
|
7898
|
+
void Flush() {
|
|
7899
|
+
FlushOptions flush_opt;
|
|
7900
|
+
flush_opt.wait = true;
|
|
7901
|
+
|
|
7902
|
+
if (db_.db != nullptr) {
|
|
7903
|
+
Status s = db_.db->Flush(flush_opt, db_.cfh);
|
|
7904
|
+
if (!s.ok()) {
|
|
7905
|
+
fprintf(stderr, "Flush failed: %s\n", s.ToString().c_str());
|
|
7906
|
+
exit(1);
|
|
7907
|
+
}
|
|
7908
|
+
} else {
|
|
7909
|
+
for (const auto& db_with_cfh : multi_dbs_) {
|
|
7910
|
+
Status s = db_with_cfh.db->Flush(flush_opt, db_with_cfh.cfh);
|
|
7911
|
+
if (!s.ok()) {
|
|
7912
|
+
fprintf(stderr, "Flush failed: %s\n", s.ToString().c_str());
|
|
7913
|
+
exit(1);
|
|
7914
|
+
}
|
|
7915
|
+
}
|
|
7916
|
+
}
|
|
7917
|
+
fprintf(stdout, "flush memtable\n");
|
|
7918
|
+
}
|
|
7919
|
+
|
|
7169
7920
|
void ResetStats() {
|
|
7170
7921
|
if (db_.db != nullptr) {
|
|
7171
7922
|
db_.db->ResetStats();
|
|
@@ -7228,6 +7979,32 @@ class Benchmark {
|
|
|
7228
7979
|
fprintf(stdout, "\n%s\n", stats.c_str());
|
|
7229
7980
|
}
|
|
7230
7981
|
|
|
7982
|
+
void PrintStats(const std::vector<std::string>& keys) {
|
|
7983
|
+
if (db_.db != nullptr) {
|
|
7984
|
+
PrintStats(db_.db, keys);
|
|
7985
|
+
}
|
|
7986
|
+
for (const auto& db_with_cfh : multi_dbs_) {
|
|
7987
|
+
PrintStats(db_with_cfh.db, keys, true);
|
|
7988
|
+
}
|
|
7989
|
+
}
|
|
7990
|
+
|
|
7991
|
+
void PrintStats(DB* db, const std::vector<std::string>& keys,
|
|
7992
|
+
bool print_header = false) {
|
|
7993
|
+
if (print_header) {
|
|
7994
|
+
fprintf(stdout, "\n==== DB: %s ===\n", db->GetName().c_str());
|
|
7995
|
+
}
|
|
7996
|
+
|
|
7997
|
+
for (const auto& key : keys) {
|
|
7998
|
+
std::string stats;
|
|
7999
|
+
if (!db->GetProperty(key, &stats)) {
|
|
8000
|
+
stats = "(failed)";
|
|
8001
|
+
}
|
|
8002
|
+
fprintf(stdout, "%s: %s\n", key.c_str(), stats.c_str());
|
|
8003
|
+
}
|
|
8004
|
+
}
|
|
8005
|
+
|
|
8006
|
+
#ifndef ROCKSDB_LITE
|
|
8007
|
+
|
|
7231
8008
|
void Replay(ThreadState* thread) {
|
|
7232
8009
|
if (db_.db != nullptr) {
|
|
7233
8010
|
Replay(thread, &db_);
|
|
@@ -7247,24 +8024,40 @@ class Benchmark {
|
|
|
7247
8024
|
s.ToString().c_str());
|
|
7248
8025
|
exit(1);
|
|
7249
8026
|
}
|
|
7250
|
-
Replayer replayer
|
|
7251
|
-
|
|
7252
|
-
|
|
7253
|
-
|
|
7254
|
-
|
|
7255
|
-
|
|
8027
|
+
std::unique_ptr<Replayer> replayer;
|
|
8028
|
+
s = db_with_cfh->db->NewDefaultReplayer(db_with_cfh->cfh,
|
|
8029
|
+
std::move(trace_reader), &replayer);
|
|
8030
|
+
if (!s.ok()) {
|
|
8031
|
+
fprintf(stderr,
|
|
8032
|
+
"Encountered an error creating a default Replayer. "
|
|
8033
|
+
"Error: %s\n",
|
|
8034
|
+
s.ToString().c_str());
|
|
8035
|
+
exit(1);
|
|
8036
|
+
}
|
|
8037
|
+
s = replayer->Prepare();
|
|
8038
|
+
if (!s.ok()) {
|
|
8039
|
+
fprintf(stderr, "Prepare for replay failed. Error: %s\n",
|
|
8040
|
+
s.ToString().c_str());
|
|
8041
|
+
}
|
|
8042
|
+
s = replayer->Replay(
|
|
8043
|
+
ReplayOptions(static_cast<uint32_t>(FLAGS_trace_replay_threads),
|
|
8044
|
+
FLAGS_trace_replay_fast_forward),
|
|
8045
|
+
nullptr);
|
|
8046
|
+
replayer.reset();
|
|
7256
8047
|
if (s.ok()) {
|
|
7257
|
-
fprintf(stdout, "Replay
|
|
8048
|
+
fprintf(stdout, "Replay completed from trace_file: %s\n",
|
|
7258
8049
|
FLAGS_trace_file.c_str());
|
|
7259
8050
|
} else {
|
|
7260
|
-
fprintf(stderr, "
|
|
7261
|
-
s.ToString().c_str());
|
|
8051
|
+
fprintf(stderr, "Replay failed. Error: %s\n", s.ToString().c_str());
|
|
7262
8052
|
}
|
|
7263
8053
|
}
|
|
8054
|
+
|
|
8055
|
+
#endif // ROCKSDB_LITE
|
|
7264
8056
|
};
|
|
7265
8057
|
|
|
7266
8058
|
int db_bench_tool(int argc, char** argv) {
|
|
7267
8059
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
8060
|
+
ConfigOptions config_options;
|
|
7268
8061
|
static bool initialized = false;
|
|
7269
8062
|
if (!initialized) {
|
|
7270
8063
|
SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
|
|
@@ -7281,8 +8074,8 @@ int db_bench_tool(int argc, char** argv) {
|
|
|
7281
8074
|
exit(1);
|
|
7282
8075
|
}
|
|
7283
8076
|
if (!FLAGS_statistics_string.empty()) {
|
|
7284
|
-
Status s =
|
|
7285
|
-
|
|
8077
|
+
Status s = Statistics::CreateFromString(config_options,
|
|
8078
|
+
FLAGS_statistics_string, &dbstats);
|
|
7286
8079
|
if (dbstats == nullptr) {
|
|
7287
8080
|
fprintf(stderr,
|
|
7288
8081
|
"No Statistics registered matching string: %s status=%s\n",
|
|
@@ -7314,34 +8107,55 @@ int db_bench_tool(int argc, char** argv) {
|
|
|
7314
8107
|
FLAGS_compression_type_e =
|
|
7315
8108
|
StringToCompressionType(FLAGS_compression_type.c_str());
|
|
7316
8109
|
|
|
8110
|
+
FLAGS_wal_compression_e =
|
|
8111
|
+
StringToCompressionType(FLAGS_wal_compression.c_str());
|
|
8112
|
+
|
|
8113
|
+
FLAGS_compressed_secondary_cache_compression_type_e = StringToCompressionType(
|
|
8114
|
+
FLAGS_compressed_secondary_cache_compression_type.c_str());
|
|
8115
|
+
|
|
7317
8116
|
#ifndef ROCKSDB_LITE
|
|
8117
|
+
// Stacked BlobDB
|
|
7318
8118
|
FLAGS_blob_db_compression_type_e =
|
|
7319
8119
|
StringToCompressionType(FLAGS_blob_db_compression_type.c_str());
|
|
7320
8120
|
|
|
7321
|
-
int env_opts =
|
|
7322
|
-
!FLAGS_hdfs.empty() + !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
|
|
8121
|
+
int env_opts = !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
|
|
7323
8122
|
if (env_opts > 1) {
|
|
7324
|
-
fprintf(stderr,
|
|
7325
|
-
"Error: --hdfs, --env_uri and --fs_uri are mutually exclusive\n");
|
|
8123
|
+
fprintf(stderr, "Error: --env_uri and --fs_uri are mutually exclusive\n");
|
|
7326
8124
|
exit(1);
|
|
7327
8125
|
}
|
|
7328
8126
|
|
|
7329
|
-
if (
|
|
7330
|
-
Status s = Env::
|
|
7331
|
-
|
|
7332
|
-
|
|
7333
|
-
|
|
7334
|
-
}
|
|
7335
|
-
} else if (!FLAGS_fs_uri.empty()) {
|
|
7336
|
-
std::shared_ptr<FileSystem> fs;
|
|
7337
|
-
Status s = FileSystem::Load(FLAGS_fs_uri, &fs);
|
|
7338
|
-
if (fs == nullptr) {
|
|
7339
|
-
fprintf(stderr, "Error: %s\n", s.ToString().c_str());
|
|
8127
|
+
if (env_opts == 1) {
|
|
8128
|
+
Status s = Env::CreateFromUri(config_options, FLAGS_env_uri, FLAGS_fs_uri,
|
|
8129
|
+
&FLAGS_env, &env_guard);
|
|
8130
|
+
if (!s.ok()) {
|
|
8131
|
+
fprintf(stderr, "Failed creating env: %s\n", s.ToString().c_str());
|
|
7340
8132
|
exit(1);
|
|
7341
8133
|
}
|
|
7342
|
-
|
|
8134
|
+
} else if (FLAGS_simulate_hdd || FLAGS_simulate_hybrid_fs_file != "") {
|
|
8135
|
+
//**TODO: Make the simulate fs something that can be loaded
|
|
8136
|
+
// from the ObjectRegistry...
|
|
8137
|
+
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> composite_env =
|
|
8138
|
+
NewCompositeEnv(std::make_shared<SimulatedHybridFileSystem>(
|
|
8139
|
+
FileSystem::Default(), FLAGS_simulate_hybrid_fs_file,
|
|
8140
|
+
/*throughput_multiplier=*/
|
|
8141
|
+
int{FLAGS_simulate_hybrid_hdd_multipliers},
|
|
8142
|
+
/*is_full_fs_warm=*/FLAGS_simulate_hdd));
|
|
8143
|
+
FLAGS_env = composite_env.get();
|
|
7343
8144
|
}
|
|
8145
|
+
|
|
8146
|
+
// Let -readonly imply -use_existing_db
|
|
8147
|
+
FLAGS_use_existing_db |= FLAGS_readonly;
|
|
7344
8148
|
#endif // ROCKSDB_LITE
|
|
8149
|
+
|
|
8150
|
+
if (!FLAGS_seed) {
|
|
8151
|
+
uint64_t now = FLAGS_env->GetSystemClock()->NowMicros();
|
|
8152
|
+
seed_base = static_cast<int64_t>(now);
|
|
8153
|
+
fprintf(stdout, "Set seed to %" PRIu64 " because --seed was 0\n",
|
|
8154
|
+
seed_base);
|
|
8155
|
+
} else {
|
|
8156
|
+
seed_base = FLAGS_seed;
|
|
8157
|
+
}
|
|
8158
|
+
|
|
7345
8159
|
if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) {
|
|
7346
8160
|
fprintf(stderr,
|
|
7347
8161
|
"`-use_existing_db` must be true for `-use_existing_keys` to be "
|
|
@@ -7349,10 +8163,6 @@ int db_bench_tool(int argc, char** argv) {
|
|
|
7349
8163
|
exit(1);
|
|
7350
8164
|
}
|
|
7351
8165
|
|
|
7352
|
-
if (!FLAGS_hdfs.empty()) {
|
|
7353
|
-
FLAGS_env = new ROCKSDB_NAMESPACE::HdfsEnv(FLAGS_hdfs);
|
|
7354
|
-
}
|
|
7355
|
-
|
|
7356
8166
|
if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NONE"))
|
|
7357
8167
|
FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::NONE;
|
|
7358
8168
|
else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NORMAL"))
|
|
@@ -7364,13 +8174,12 @@ int db_bench_tool(int argc, char** argv) {
|
|
|
7364
8174
|
else {
|
|
7365
8175
|
fprintf(stdout, "Unknown compaction fadvice:%s\n",
|
|
7366
8176
|
FLAGS_compaction_fadvice.c_str());
|
|
8177
|
+
exit(1);
|
|
7367
8178
|
}
|
|
7368
8179
|
|
|
7369
8180
|
FLAGS_value_size_distribution_type_e =
|
|
7370
8181
|
StringToDistributionType(FLAGS_value_size_distribution_type.c_str());
|
|
7371
8182
|
|
|
7372
|
-
FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
|
|
7373
|
-
|
|
7374
8183
|
// Note options sanitization may increase thread pool sizes according to
|
|
7375
8184
|
// max_background_flushes/max_background_compactions/max_background_jobs
|
|
7376
8185
|
FLAGS_env->SetBackgroundThreads(FLAGS_num_high_pri_threads,
|