@nxtedition/rocksdb 8.2.0 → 8.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +3 -3
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +8 -345
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
- package/deps/rocksdb/rocksdb/db/builder.h +2 -2
- package/deps/rocksdb/rocksdb/db/c.cc +76 -5
- package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
- package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
- package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
- package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
- package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
- package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
- package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
- package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
- package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
- package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
- package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
- package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
- package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
- package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
- package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
- package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
- package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
- package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
- package/deps/rocksdb/rocksdb/options/options.cc +12 -53
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
- package/deps/rocksdb/rocksdb/port/lang.h +27 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
- package/deps/rocksdb/rocksdb/src.mk +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
- package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/format.cc +4 -4
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
- package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
- package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
- package/deps/rocksdb/rocksdb/util/compression.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
- package/deps/rocksdb/rocksdb/util/math.h +12 -7
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
- package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
- package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
|
@@ -55,6 +55,11 @@ struct JemallocAllocatorOptions {
|
|
|
55
55
|
// Upper bound of allocation size to use tcache, if limit_tcache_size=true.
|
|
56
56
|
// When used with block cache, it is recommended to set it to block_size.
|
|
57
57
|
size_t tcache_size_upper_bound = 16 * 1024;
|
|
58
|
+
|
|
59
|
+
// Number of arenas across which we spread allocation requests. Increasing
|
|
60
|
+
// this setting can mitigate arena mutex contention. The value must be
|
|
61
|
+
// positive.
|
|
62
|
+
size_t num_arenas = 1;
|
|
58
63
|
};
|
|
59
64
|
|
|
60
65
|
// Generate memory allocator which allocates through Jemalloc and utilize
|
|
@@ -70,7 +75,8 @@ struct JemallocAllocatorOptions {
|
|
|
70
75
|
// core dump. Side benefit of using single arena would be reduction of jemalloc
|
|
71
76
|
// metadata for some workloads.
|
|
72
77
|
//
|
|
73
|
-
// To mitigate mutex contention for using one single arena
|
|
78
|
+
// To mitigate mutex contention for using one single arena (see also
|
|
79
|
+
// `JemallocAllocatorOptions::num_arenas` above), jemalloc tcache
|
|
74
80
|
// (thread-local cache) is enabled to cache unused allocations for future use.
|
|
75
81
|
// The tcache normally incurs 0.5M extra memory usage per-thread. The usage
|
|
76
82
|
// can be reduced by limiting allocation sizes to cache.
|
|
@@ -1157,7 +1157,7 @@ struct DBOptions {
|
|
|
1157
1157
|
|
|
1158
1158
|
// A global cache for table-level rows.
|
|
1159
1159
|
// Default: nullptr (disabled)
|
|
1160
|
-
std::shared_ptr<
|
|
1160
|
+
std::shared_ptr<GeneralCache> row_cache = nullptr;
|
|
1161
1161
|
|
|
1162
1162
|
// A filter object supplied to be invoked while processing write-ahead-logs
|
|
1163
1163
|
// (WALs) during recovery. The filter provides a way to inspect log
|
|
@@ -1457,12 +1457,119 @@ enum ReadTier {
|
|
|
1457
1457
|
|
|
1458
1458
|
// Options that control read operations
|
|
1459
1459
|
struct ReadOptions {
|
|
1460
|
+
// *** BEGIN options relevant to point lookups as well as scans ***
|
|
1461
|
+
|
|
1460
1462
|
// If "snapshot" is non-nullptr, read as of the supplied snapshot
|
|
1461
1463
|
// (which must belong to the DB that is being read and which must
|
|
1462
1464
|
// not have been released). If "snapshot" is nullptr, use an implicit
|
|
1463
1465
|
// snapshot of the state at the beginning of this read operation.
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
+
const Snapshot* snapshot = nullptr;
|
|
1467
|
+
|
|
1468
|
+
// Timestamp of operation. Read should return the latest data visible to the
|
|
1469
|
+
// specified timestamp. All timestamps of the same database must be of the
|
|
1470
|
+
// same length and format. The user is responsible for providing a customized
|
|
1471
|
+
// compare function via Comparator to order <key, timestamp> tuples.
|
|
1472
|
+
// For iterator, iter_start_ts is the lower bound (older) and timestamp
|
|
1473
|
+
// serves as the upper bound. Versions of the same record that fall in
|
|
1474
|
+
// the timestamp range will be returned. If iter_start_ts is nullptr,
|
|
1475
|
+
// only the most recent version visible to timestamp is returned.
|
|
1476
|
+
// The user-specified timestamp feature is still under active development,
|
|
1477
|
+
// and the API is subject to change.
|
|
1478
|
+
const Slice* timestamp = nullptr;
|
|
1479
|
+
const Slice* iter_start_ts = nullptr;
|
|
1480
|
+
|
|
1481
|
+
// Deadline for completing an API call (Get/MultiGet/Seek/Next for now)
|
|
1482
|
+
// in microseconds.
|
|
1483
|
+
// It should be set to microseconds since epoch, i.e, gettimeofday or
|
|
1484
|
+
// equivalent plus allowed duration in microseconds. The best way is to use
|
|
1485
|
+
// env->NowMicros() + some timeout.
|
|
1486
|
+
// This is best efforts. The call may exceed the deadline if there is IO
|
|
1487
|
+
// involved and the file system doesn't support deadlines, or due to
|
|
1488
|
+
// checking for deadline periodically rather than for every key if
|
|
1489
|
+
// processing a batch
|
|
1490
|
+
std::chrono::microseconds deadline = std::chrono::microseconds::zero();
|
|
1491
|
+
|
|
1492
|
+
// A timeout in microseconds to be passed to the underlying FileSystem for
|
|
1493
|
+
// reads. As opposed to deadline, this determines the timeout for each
|
|
1494
|
+
// individual file read request. If a MultiGet/Get/Seek/Next etc call
|
|
1495
|
+
// results in multiple reads, each read can last up to io_timeout us.
|
|
1496
|
+
std::chrono::microseconds io_timeout = std::chrono::microseconds::zero();
|
|
1497
|
+
|
|
1498
|
+
// Specify if this read request should process data that ALREADY
|
|
1499
|
+
// resides on a particular cache. If the required data is not
|
|
1500
|
+
// found at the specified cache, then Status::Incomplete is returned.
|
|
1501
|
+
ReadTier read_tier = kReadAllTier;
|
|
1502
|
+
|
|
1503
|
+
// For file reads associated with this option, charge the internal rate
|
|
1504
|
+
// limiter (see `DBOptions::rate_limiter`) at the specified priority. The
|
|
1505
|
+
// special value `Env::IO_TOTAL` disables charging the rate limiter.
|
|
1506
|
+
//
|
|
1507
|
+
// The rate limiting is bypassed no matter this option's value for file reads
|
|
1508
|
+
// on plain tables (these can exist when `ColumnFamilyOptions::table_factory`
|
|
1509
|
+
// is a `PlainTableFactory`) and cuckoo tables (these can exist when
|
|
1510
|
+
// `ColumnFamilyOptions::table_factory` is a `CuckooTableFactory`).
|
|
1511
|
+
//
|
|
1512
|
+
// The bytes charged to rate limiter may not exactly match the file read bytes
|
|
1513
|
+
// since there are some seemingly insignificant reads, like for file
|
|
1514
|
+
// headers/footers, that we currently do not charge to rate limiter.
|
|
1515
|
+
Env::IOPriority rate_limiter_priority = Env::IO_TOTAL;
|
|
1516
|
+
|
|
1517
|
+
// It limits the maximum cumulative value size of the keys in batch while
|
|
1518
|
+
// reading through MultiGet. Once the cumulative value size exceeds this
|
|
1519
|
+
// soft limit then all the remaining keys are returned with status Aborted.
|
|
1520
|
+
uint64_t value_size_soft_limit = std::numeric_limits<uint64_t>::max();
|
|
1521
|
+
|
|
1522
|
+
// If true, all data read from underlying storage will be
|
|
1523
|
+
// verified against corresponding checksums.
|
|
1524
|
+
bool verify_checksums = true;
|
|
1525
|
+
|
|
1526
|
+
// Should the "data block"/"index block" read for this iteration be placed in
|
|
1527
|
+
// block cache?
|
|
1528
|
+
// Callers may wish to set this field to false for bulk scans.
|
|
1529
|
+
// This would help not to the change eviction order of existing items in the
|
|
1530
|
+
// block cache.
|
|
1531
|
+
bool fill_cache = true;
|
|
1532
|
+
|
|
1533
|
+
// If true, range tombstones handling will be skipped in key lookup paths.
|
|
1534
|
+
// For DB instances that don't use DeleteRange() calls, this setting can
|
|
1535
|
+
// be used to optimize the read performance.
|
|
1536
|
+
// Note that, if this assumption (of no previous DeleteRange() calls) is
|
|
1537
|
+
// broken, stale keys could be served in read paths.
|
|
1538
|
+
bool ignore_range_deletions = false;
|
|
1539
|
+
|
|
1540
|
+
// Experimental
|
|
1541
|
+
//
|
|
1542
|
+
// If async_io is enabled, RocksDB will prefetch some of data asynchronously.
|
|
1543
|
+
// RocksDB apply it if reads are sequential and its internal automatic
|
|
1544
|
+
// prefetching.
|
|
1545
|
+
bool async_io = false;
|
|
1546
|
+
|
|
1547
|
+
// Experimental
|
|
1548
|
+
//
|
|
1549
|
+
// If async_io is set, then this flag controls whether we read SST files
|
|
1550
|
+
// in multiple levels asynchronously. Enabling this flag can help reduce
|
|
1551
|
+
// MultiGet latency by maximizing the number of SST files read in
|
|
1552
|
+
// parallel if the keys in the MultiGet batch are in different levels. It
|
|
1553
|
+
// comes at the expense of slightly higher CPU overhead.
|
|
1554
|
+
bool optimize_multiget_for_io = true;
|
|
1555
|
+
|
|
1556
|
+
// *** END options relevant to point lookups (as well as scans) ***
|
|
1557
|
+
// *** BEGIN options only relevant to iterators or scans ***
|
|
1558
|
+
|
|
1559
|
+
// RocksDB does auto-readahead for iterators on noticing more than two reads
|
|
1560
|
+
// for a table file. The readahead starts at 8KB and doubles on every
|
|
1561
|
+
// additional read up to 256KB.
|
|
1562
|
+
// This option can help if most of the range scans are large, and if it is
|
|
1563
|
+
// determined that a larger readahead than that enabled by auto-readahead is
|
|
1564
|
+
// needed.
|
|
1565
|
+
// Using a large readahead size (> 2MB) can typically improve the performance
|
|
1566
|
+
// of forward iteration on spinning disks.
|
|
1567
|
+
size_t readahead_size = 0;
|
|
1568
|
+
|
|
1569
|
+
// A threshold for the number of keys that can be skipped before failing an
|
|
1570
|
+
// iterator seek as incomplete. The default value of 0 should be used to
|
|
1571
|
+
// never fail a request as incomplete, even on skipping too many keys.
|
|
1572
|
+
uint64_t max_skippable_internal_keys = 0;
|
|
1466
1573
|
|
|
1467
1574
|
// `iterate_lower_bound` defines the smallest key at which the backward
|
|
1468
1575
|
// iterator can return an entry. Once the bound is passed, Valid() will be
|
|
@@ -1475,8 +1582,7 @@ struct ReadOptions {
|
|
|
1475
1582
|
//
|
|
1476
1583
|
// In case of user_defined timestamp, if enabled, iterate_lower_bound should
|
|
1477
1584
|
// point to key without timestamp part.
|
|
1478
|
-
|
|
1479
|
-
const Slice* iterate_lower_bound;
|
|
1585
|
+
const Slice* iterate_lower_bound = nullptr;
|
|
1480
1586
|
|
|
1481
1587
|
// "iterate_upper_bound" defines the extent up to which the forward iterator
|
|
1482
1588
|
// can return entries. Once the bound is reached, Valid() will be false.
|
|
@@ -1496,63 +1602,24 @@ struct ReadOptions {
|
|
|
1496
1602
|
//
|
|
1497
1603
|
// In case of user_defined timestamp, if enabled, iterate_upper_bound should
|
|
1498
1604
|
// point to key without timestamp part.
|
|
1499
|
-
|
|
1500
|
-
const Slice* iterate_upper_bound;
|
|
1501
|
-
|
|
1502
|
-
// RocksDB does auto-readahead for iterators on noticing more than two reads
|
|
1503
|
-
// for a table file. The readahead starts at 8KB and doubles on every
|
|
1504
|
-
// additional read up to 256KB.
|
|
1505
|
-
// This option can help if most of the range scans are large, and if it is
|
|
1506
|
-
// determined that a larger readahead than that enabled by auto-readahead is
|
|
1507
|
-
// needed.
|
|
1508
|
-
// Using a large readahead size (> 2MB) can typically improve the performance
|
|
1509
|
-
// of forward iteration on spinning disks.
|
|
1510
|
-
// Default: 0
|
|
1511
|
-
size_t readahead_size;
|
|
1512
|
-
|
|
1513
|
-
// A threshold for the number of keys that can be skipped before failing an
|
|
1514
|
-
// iterator seek as incomplete. The default value of 0 should be used to
|
|
1515
|
-
// never fail a request as incomplete, even on skipping too many keys.
|
|
1516
|
-
// Default: 0
|
|
1517
|
-
uint64_t max_skippable_internal_keys;
|
|
1518
|
-
|
|
1519
|
-
// Specify if this read request should process data that ALREADY
|
|
1520
|
-
// resides on a particular cache. If the required data is not
|
|
1521
|
-
// found at the specified cache, then Status::Incomplete is returned.
|
|
1522
|
-
// Default: kReadAllTier
|
|
1523
|
-
ReadTier read_tier;
|
|
1524
|
-
|
|
1525
|
-
// If true, all data read from underlying storage will be
|
|
1526
|
-
// verified against corresponding checksums.
|
|
1527
|
-
// Default: true
|
|
1528
|
-
bool verify_checksums;
|
|
1529
|
-
|
|
1530
|
-
// Should the "data block"/"index block" read for this iteration be placed in
|
|
1531
|
-
// block cache?
|
|
1532
|
-
// Callers may wish to set this field to false for bulk scans.
|
|
1533
|
-
// This would help not to the change eviction order of existing items in the
|
|
1534
|
-
// block cache.
|
|
1535
|
-
// Default: true
|
|
1536
|
-
bool fill_cache;
|
|
1605
|
+
const Slice* iterate_upper_bound = nullptr;
|
|
1537
1606
|
|
|
1538
1607
|
// Specify to create a tailing iterator -- a special iterator that has a
|
|
1539
1608
|
// view of the complete database (i.e. it can also be used to read newly
|
|
1540
1609
|
// added data) and is optimized for sequential reads. It will return records
|
|
1541
1610
|
// that were inserted into the database after the creation of the iterator.
|
|
1542
|
-
|
|
1543
|
-
bool tailing;
|
|
1611
|
+
bool tailing = false;
|
|
1544
1612
|
|
|
1545
1613
|
// This options is not used anymore. It was to turn on a functionality that
|
|
1546
|
-
// has been removed.
|
|
1547
|
-
bool managed;
|
|
1614
|
+
// has been removed. DEPRECATED
|
|
1615
|
+
bool managed = false;
|
|
1548
1616
|
|
|
1549
1617
|
// Enable a total order seek regardless of index format (e.g. hash index)
|
|
1550
1618
|
// used in the table. Some table format (e.g. plain table) may not support
|
|
1551
1619
|
// this option.
|
|
1552
1620
|
// If true when calling Get(), we also skip prefix bloom when reading from
|
|
1553
1621
|
// block based table, which only affects Get() performance.
|
|
1554
|
-
|
|
1555
|
-
bool total_order_seek;
|
|
1622
|
+
bool total_order_seek = false;
|
|
1556
1623
|
|
|
1557
1624
|
// When true, by default use total_order_seek = true, and RocksDB can
|
|
1558
1625
|
// selectively enable prefix seek mode if won't generate a different result
|
|
@@ -1568,38 +1635,37 @@ struct ReadOptions {
|
|
|
1568
1635
|
// iterators. (We are also assuming the new condition on
|
|
1569
1636
|
// IsSameLengthImmediateSuccessor is satisfied; see its BUG section).
|
|
1570
1637
|
// A bug example is in DBTest2::AutoPrefixMode1, search for "BUG".
|
|
1571
|
-
|
|
1572
|
-
bool auto_prefix_mode;
|
|
1638
|
+
bool auto_prefix_mode = false;
|
|
1573
1639
|
|
|
1574
1640
|
// Enforce that the iterator only iterates over the same prefix as the seek.
|
|
1575
1641
|
// This option is effective only for prefix seeks, i.e. prefix_extractor is
|
|
1576
1642
|
// non-null for the column family and total_order_seek is false. Unlike
|
|
1577
1643
|
// iterate_upper_bound, prefix_same_as_start only works within a prefix
|
|
1578
1644
|
// but in both directions.
|
|
1579
|
-
|
|
1580
|
-
bool prefix_same_as_start;
|
|
1645
|
+
bool prefix_same_as_start = false;
|
|
1581
1646
|
|
|
1582
1647
|
// Keep the blocks loaded by the iterator pinned in memory as long as the
|
|
1583
1648
|
// iterator is not deleted, If used when reading from tables created with
|
|
1584
1649
|
// BlockBasedTableOptions::use_delta_encoding = false,
|
|
1585
1650
|
// Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to
|
|
1586
1651
|
// return 1.
|
|
1587
|
-
|
|
1588
|
-
|
|
1652
|
+
bool pin_data = false;
|
|
1653
|
+
|
|
1654
|
+
// For iterators, RocksDB does auto-readahead on noticing more than two
|
|
1655
|
+
// sequential reads for a table file if user doesn't provide readahead_size.
|
|
1656
|
+
// The readahead starts at 8KB and doubles on every additional read upto
|
|
1657
|
+
// max_auto_readahead_size only when reads are sequential. However at each
|
|
1658
|
+
// level, if iterator moves over next file, readahead_size starts again from
|
|
1659
|
+
// 8KB.
|
|
1660
|
+
//
|
|
1661
|
+
// By enabling this option, RocksDB will do some enhancements for
|
|
1662
|
+
// prefetching the data.
|
|
1663
|
+
bool adaptive_readahead = false;
|
|
1589
1664
|
|
|
1590
1665
|
// If true, when PurgeObsoleteFile is called in CleanupIteratorState, we
|
|
1591
1666
|
// schedule a background job in the flush job queue and delete obsolete files
|
|
1592
1667
|
// in background.
|
|
1593
|
-
|
|
1594
|
-
bool background_purge_on_iterator_cleanup;
|
|
1595
|
-
|
|
1596
|
-
// If true, range tombstones handling will be skipped in key lookup paths.
|
|
1597
|
-
// For DB instances that don't use DeleteRange() calls, this setting can
|
|
1598
|
-
// be used to optimize the read performance.
|
|
1599
|
-
// Note that, if this assumption (of no previous DeleteRange() calls) is
|
|
1600
|
-
// broken, stale keys could be served in read paths.
|
|
1601
|
-
// Default: false
|
|
1602
|
-
bool ignore_range_deletions;
|
|
1668
|
+
bool background_purge_on_iterator_cleanup = false;
|
|
1603
1669
|
|
|
1604
1670
|
// A callback to determine whether relevant keys for this scan exist in a
|
|
1605
1671
|
// given table based on the table's properties. The callback is passed the
|
|
@@ -1609,95 +1675,14 @@ struct ReadOptions {
|
|
|
1609
1675
|
// Default: empty (every table will be scanned)
|
|
1610
1676
|
std::function<bool(const TableProperties&)> table_filter;
|
|
1611
1677
|
|
|
1612
|
-
//
|
|
1613
|
-
// specified timestamp. All timestamps of the same database must be of the
|
|
1614
|
-
// same length and format. The user is responsible for providing a customized
|
|
1615
|
-
// compare function via Comparator to order <key, timestamp> tuples.
|
|
1616
|
-
// For iterator, iter_start_ts is the lower bound (older) and timestamp
|
|
1617
|
-
// serves as the upper bound. Versions of the same record that fall in
|
|
1618
|
-
// the timestamp range will be returned. If iter_start_ts is nullptr,
|
|
1619
|
-
// only the most recent version visible to timestamp is returned.
|
|
1620
|
-
// The user-specified timestamp feature is still under active development,
|
|
1621
|
-
// and the API is subject to change.
|
|
1622
|
-
// Default: nullptr
|
|
1623
|
-
const Slice* timestamp;
|
|
1624
|
-
const Slice* iter_start_ts;
|
|
1678
|
+
// *** END options only relevant to iterators or scans ***
|
|
1625
1679
|
|
|
1626
|
-
//
|
|
1627
|
-
|
|
1628
|
-
// It should be set to microseconds since epoch, i.e, gettimeofday or
|
|
1629
|
-
// equivalent plus allowed duration in microseconds. The best way is to use
|
|
1630
|
-
// env->NowMicros() + some timeout.
|
|
1631
|
-
// This is best efforts. The call may exceed the deadline if there is IO
|
|
1632
|
-
// involved and the file system doesn't support deadlines, or due to
|
|
1633
|
-
// checking for deadline periodically rather than for every key if
|
|
1634
|
-
// processing a batch
|
|
1635
|
-
std::chrono::microseconds deadline;
|
|
1636
|
-
|
|
1637
|
-
// A timeout in microseconds to be passed to the underlying FileSystem for
|
|
1638
|
-
// reads. As opposed to deadline, this determines the timeout for each
|
|
1639
|
-
// individual file read request. If a MultiGet/Get/Seek/Next etc call
|
|
1640
|
-
// results in multiple reads, each read can last up to io_timeout us.
|
|
1641
|
-
std::chrono::microseconds io_timeout;
|
|
1642
|
-
|
|
1643
|
-
// It limits the maximum cumulative value size of the keys in batch while
|
|
1644
|
-
// reading through MultiGet. Once the cumulative value size exceeds this
|
|
1645
|
-
// soft limit then all the remaining keys are returned with status Aborted.
|
|
1646
|
-
//
|
|
1647
|
-
// Default: std::numeric_limits<uint64_t>::max()
|
|
1648
|
-
uint64_t value_size_soft_limit;
|
|
1649
|
-
|
|
1650
|
-
// For iterators, RocksDB does auto-readahead on noticing more than two
|
|
1651
|
-
// sequential reads for a table file if user doesn't provide readahead_size.
|
|
1652
|
-
// The readahead starts at 8KB and doubles on every additional read upto
|
|
1653
|
-
// max_auto_readahead_size only when reads are sequential. However at each
|
|
1654
|
-
// level, if iterator moves over next file, readahead_size starts again from
|
|
1655
|
-
// 8KB.
|
|
1656
|
-
//
|
|
1657
|
-
// By enabling this option, RocksDB will do some enhancements for
|
|
1658
|
-
// prefetching the data.
|
|
1659
|
-
//
|
|
1660
|
-
// Default: false
|
|
1661
|
-
bool adaptive_readahead;
|
|
1662
|
-
|
|
1663
|
-
// For file reads associated with this option, charge the internal rate
|
|
1664
|
-
// limiter (see `DBOptions::rate_limiter`) at the specified priority. The
|
|
1665
|
-
// special value `Env::IO_TOTAL` disables charging the rate limiter.
|
|
1666
|
-
//
|
|
1667
|
-
// The rate limiting is bypassed no matter this option's value for file reads
|
|
1668
|
-
// on plain tables (these can exist when `ColumnFamilyOptions::table_factory`
|
|
1669
|
-
// is a `PlainTableFactory`) and cuckoo tables (these can exist when
|
|
1670
|
-
// `ColumnFamilyOptions::table_factory` is a `CuckooTableFactory`).
|
|
1671
|
-
//
|
|
1672
|
-
// The bytes charged to rate limiter may not exactly match the file read bytes
|
|
1673
|
-
// since there are some seemingly insignificant reads, like for file
|
|
1674
|
-
// headers/footers, that we currently do not charge to rate limiter.
|
|
1675
|
-
//
|
|
1676
|
-
// Default: `Env::IO_TOTAL`.
|
|
1677
|
-
Env::IOPriority rate_limiter_priority = Env::IO_TOTAL;
|
|
1678
|
-
|
|
1679
|
-
// Experimental
|
|
1680
|
-
//
|
|
1681
|
-
// If async_io is enabled, RocksDB will prefetch some of data asynchronously.
|
|
1682
|
-
// RocksDB apply it if reads are sequential and its internal automatic
|
|
1683
|
-
// prefetching.
|
|
1684
|
-
//
|
|
1685
|
-
// Default: false
|
|
1686
|
-
bool async_io;
|
|
1687
|
-
|
|
1688
|
-
// Experimental
|
|
1689
|
-
//
|
|
1690
|
-
// If async_io is set, then this flag controls whether we read SST files
|
|
1691
|
-
// in multiple levels asynchronously. Enabling this flag can help reduce
|
|
1692
|
-
// MultiGet latency by maximizing the number of SST files read in
|
|
1693
|
-
// parallel if the keys in the MultiGet batch are in different levels. It
|
|
1694
|
-
// comes at the expense of slightly higher CPU overhead.
|
|
1695
|
-
//
|
|
1696
|
-
// Default: true
|
|
1697
|
-
bool optimize_multiget_for_io;
|
|
1680
|
+
// ** For RocksDB internal use only **
|
|
1681
|
+
Env::IOActivity io_activity = Env::IOActivity::kUnknown;
|
|
1698
1682
|
|
|
1699
|
-
ReadOptions()
|
|
1700
|
-
ReadOptions(bool
|
|
1683
|
+
ReadOptions() {}
|
|
1684
|
+
ReadOptions(bool _verify_checksums, bool _fill_cache);
|
|
1685
|
+
explicit ReadOptions(Env::IOActivity _io_activity);
|
|
1701
1686
|
};
|
|
1702
1687
|
|
|
1703
1688
|
// Options that control write operations
|
|
@@ -14,13 +14,19 @@
|
|
|
14
14
|
|
|
15
15
|
namespace ROCKSDB_NAMESPACE {
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
/*
|
|
18
|
+
* NOTE:
|
|
19
|
+
* Please do not reorder the fields in this structure. If you plan to do that or
|
|
20
|
+
* add/remove fields to this structure, builds would fail. The way to fix the
|
|
21
|
+
* builds would be to add the appropriate fields to the
|
|
22
|
+
* DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file.
|
|
23
|
+
*/
|
|
20
24
|
|
|
21
25
|
// Break down performance counters by level and store per-level perf context in
|
|
22
26
|
// PerfContextByLevel
|
|
23
|
-
struct
|
|
27
|
+
struct PerfContextByLevelBase {
|
|
28
|
+
// These Bloom stats apply to point reads (Get/MultiGet) for whole key and
|
|
29
|
+
// prefix filters.
|
|
24
30
|
// # of times bloom filter has avoided file reads, i.e., negatives.
|
|
25
31
|
uint64_t bloom_filter_useful = 0;
|
|
26
32
|
// # of times bloom FullFilter has not avoided the reads.
|
|
@@ -38,37 +44,34 @@ struct PerfContextByLevel {
|
|
|
38
44
|
|
|
39
45
|
uint64_t block_cache_hit_count = 0; // total number of block cache hits
|
|
40
46
|
uint64_t block_cache_miss_count = 0; // total number of block cache misses
|
|
41
|
-
|
|
42
|
-
void Reset(); // reset all performance counters to zero
|
|
43
47
|
};
|
|
44
48
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
PerfContext() {}
|
|
49
|
-
|
|
50
|
-
PerfContext(const PerfContext&);
|
|
51
|
-
PerfContext& operator=(const PerfContext&);
|
|
52
|
-
PerfContext(PerfContext&&) noexcept;
|
|
49
|
+
// A thread local context for gathering performance counter efficiently
|
|
50
|
+
// and transparently.
|
|
51
|
+
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
|
|
53
52
|
|
|
53
|
+
// Break down performance counters by level and store per-level perf context in
|
|
54
|
+
// PerfContextByLevel
|
|
55
|
+
struct PerfContextByLevel : public PerfContextByLevelBase {
|
|
54
56
|
void Reset(); // reset all performance counters to zero
|
|
57
|
+
};
|
|
55
58
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
// free the space for PerfContextByLevel, also disable per level perf context
|
|
65
|
-
void ClearPerLevelPerfContext();
|
|
59
|
+
/*
|
|
60
|
+
* NOTE:
|
|
61
|
+
* Please do not reorder the fields in this structure. If you plan to do that or
|
|
62
|
+
* add/remove fields to this structure, builds would fail. The way to fix the
|
|
63
|
+
* builds would be to add the appropriate fields to the
|
|
64
|
+
* DEF_PERF_CONTEXT_METRICS() macro in the perf_context.cc file.
|
|
65
|
+
*/
|
|
66
66
|
|
|
67
|
+
struct PerfContextBase {
|
|
67
68
|
uint64_t user_key_comparison_count; // total number of user key comparisons
|
|
68
69
|
uint64_t block_cache_hit_count; // total number of block cache hits
|
|
69
70
|
uint64_t block_read_count; // total number of block reads (with IO)
|
|
70
71
|
uint64_t block_read_byte; // total number of bytes from block reads
|
|
71
72
|
uint64_t block_read_time; // total nanos spent on block reads
|
|
73
|
+
// total cpu time in nanos spent on block reads
|
|
74
|
+
uint64_t block_read_cpu_time;
|
|
72
75
|
uint64_t block_cache_index_hit_count; // total number of index block hits
|
|
73
76
|
// total number of standalone handles lookup from secondary cache
|
|
74
77
|
uint64_t block_cache_standalone_handle_count;
|
|
@@ -216,9 +219,9 @@ struct PerfContext {
|
|
|
216
219
|
uint64_t bloom_memtable_hit_count;
|
|
217
220
|
// total number of mem table bloom misses
|
|
218
221
|
uint64_t bloom_memtable_miss_count;
|
|
219
|
-
// total number of SST
|
|
222
|
+
// total number of SST bloom hits
|
|
220
223
|
uint64_t bloom_sst_hit_count;
|
|
221
|
-
// total number of SST
|
|
224
|
+
// total number of SST bloom misses
|
|
222
225
|
uint64_t bloom_sst_miss_count;
|
|
223
226
|
|
|
224
227
|
// Time spent waiting on key locks in transaction lock manager.
|
|
@@ -254,15 +257,47 @@ struct PerfContext {
|
|
|
254
257
|
uint64_t iter_prev_cpu_nanos;
|
|
255
258
|
uint64_t iter_seek_cpu_nanos;
|
|
256
259
|
|
|
260
|
+
// EXPERIMENTAL
|
|
261
|
+
// Total number of db iterator's Next(), Prev(), Seek-related APIs being
|
|
262
|
+
// called
|
|
263
|
+
uint64_t iter_next_count;
|
|
264
|
+
uint64_t iter_prev_count;
|
|
265
|
+
uint64_t iter_seek_count;
|
|
266
|
+
|
|
257
267
|
// Time spent in encrypting data. Populated when EncryptedEnv is used.
|
|
258
268
|
uint64_t encrypt_data_nanos;
|
|
259
269
|
// Time spent in decrypting data. Populated when EncryptedEnv is used.
|
|
260
270
|
uint64_t decrypt_data_nanos;
|
|
261
271
|
|
|
262
272
|
uint64_t number_async_seek;
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
struct PerfContext : public PerfContextBase {
|
|
276
|
+
~PerfContext();
|
|
277
|
+
|
|
278
|
+
PerfContext() {}
|
|
279
|
+
|
|
280
|
+
PerfContext(const PerfContext&);
|
|
281
|
+
PerfContext& operator=(const PerfContext&);
|
|
282
|
+
PerfContext(PerfContext&&) noexcept;
|
|
283
|
+
|
|
284
|
+
void Reset(); // reset all performance counters to zero
|
|
285
|
+
|
|
286
|
+
std::string ToString(bool exclude_zero_counters = false) const;
|
|
287
|
+
|
|
288
|
+
// enable per level perf context and allocate storage for PerfContextByLevel
|
|
289
|
+
void EnablePerLevelPerfContext();
|
|
290
|
+
|
|
291
|
+
// temporarily disable per level perf context by setting the flag to false
|
|
292
|
+
void DisablePerLevelPerfContext();
|
|
293
|
+
|
|
294
|
+
// free the space for PerfContextByLevel, also disable per level perf context
|
|
295
|
+
void ClearPerLevelPerfContext();
|
|
263
296
|
|
|
264
297
|
std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
|
|
265
298
|
bool per_level_perf_context_enabled = false;
|
|
299
|
+
|
|
300
|
+
void copyMetrics(const PerfContext* other) noexcept;
|
|
266
301
|
};
|
|
267
302
|
|
|
268
303
|
// If RocksDB is compiled with -DNPERF_CONTEXT, then a pointer to a global,
|
|
@@ -17,11 +17,23 @@
|
|
|
17
17
|
|
|
18
18
|
namespace ROCKSDB_NAMESPACE {
|
|
19
19
|
|
|
20
|
-
// A handle for lookup result.
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
//
|
|
20
|
+
// A handle for lookup result. Immediately after SecondaryCache::Lookup() with
|
|
21
|
+
// wait=false (and depending on the implementation), the handle could be in any
|
|
22
|
+
// of the below states. It must not be destroyed while in the pending state.
|
|
23
|
+
// * Pending state (IsReady() == false): result is not ready. Value() and Size()
|
|
24
|
+
// must not be called.
|
|
25
|
+
// * Ready + not found state (IsReady() == true, Value() == nullptr): the lookup
|
|
26
|
+
// has completed, finding no match. Or an error occurred that prevented
|
|
27
|
+
// normal completion of the Lookup.
|
|
28
|
+
// * Ready + found state (IsReady() == false, Value() != nullptr): the lookup
|
|
29
|
+
// has completed, finding an entry that has been loaded into an object that is
|
|
30
|
+
// now owned by the caller.
|
|
31
|
+
//
|
|
32
|
+
// Wait() or SecondaryCache::WaitAll() may be skipped if IsReady() happens to
|
|
33
|
+
// return true, but (depending on the implementation) IsReady() might never
|
|
34
|
+
// return true without Wait() or SecondaryCache::WaitAll(). After the handle
|
|
35
|
+
// is known ready, calling Value() is required to avoid a memory leak in case
|
|
36
|
+
// of a cache hit.
|
|
25
37
|
class SecondaryCacheResultHandle {
|
|
26
38
|
public:
|
|
27
39
|
virtual ~SecondaryCacheResultHandle() = default;
|
|
@@ -36,7 +48,9 @@ class SecondaryCacheResultHandle {
|
|
|
36
48
|
// the lookup was unsuccessful.
|
|
37
49
|
virtual Cache::ObjectPtr Value() = 0;
|
|
38
50
|
|
|
39
|
-
// Return the
|
|
51
|
+
// Return the out_charge from the helper->create_cb used to construct the
|
|
52
|
+
// object.
|
|
53
|
+
// WART: potentially confusing name
|
|
40
54
|
virtual size_t Size() = 0;
|
|
41
55
|
};
|
|
42
56
|
|
|
@@ -57,24 +71,13 @@ class SecondaryCache : public Customizable {
|
|
|
57
71
|
const std::string& id,
|
|
58
72
|
std::shared_ptr<SecondaryCache>* result);
|
|
59
73
|
|
|
60
|
-
//
|
|
61
|
-
//
|
|
62
|
-
//
|
|
63
|
-
//
|
|
64
|
-
//
|
|
65
|
-
//
|
|
66
|
-
//
|
|
67
|
-
// persistable data (typically uncompressed block), which will be written
|
|
68
|
-
// to this tier. The implementation may or may not write it to cache
|
|
69
|
-
// depending on the admission control policy, even if the return status
|
|
70
|
-
// is success (OK).
|
|
71
|
-
//
|
|
72
|
-
// If the implementation is asynchronous or otherwise uses `value` after
|
|
73
|
-
// the call returns, then InsertSaved() must be overridden not to rely on
|
|
74
|
-
// Insert(). For example, there could be a "holding area" in memory where
|
|
75
|
-
// Lookup() might return the same parsed value back. But more typically, if
|
|
76
|
-
// the implementation only uses `value` for getting persistable data during
|
|
77
|
-
// the call, then the default implementation of `InsertSaved()` suffices.
|
|
74
|
+
// Suggest inserting an entry into this cache. The caller retains ownership
|
|
75
|
+
// of `obj` (also called the "value"), so is only used directly by the
|
|
76
|
+
// SecondaryCache during Insert(). When the cache chooses to perform the
|
|
77
|
+
// suggested insertion, it uses the size_cb and saveto_cb provided by
|
|
78
|
+
// `helper` to extract the persistable data (typically an uncompressed block)
|
|
79
|
+
// and writes it to this cache tier. OK may be returned even if the insertion
|
|
80
|
+
// is not made.
|
|
78
81
|
virtual Status Insert(const Slice& key, Cache::ObjectPtr obj,
|
|
79
82
|
const Cache::CacheItemHelper* helper) = 0;
|
|
80
83
|
|
|
@@ -84,8 +87,9 @@ class SecondaryCache : public Customizable {
|
|
|
84
87
|
// may or may not write it to cache depending on the admission control
|
|
85
88
|
// policy, even if the return status is success.
|
|
86
89
|
//
|
|
87
|
-
// The default implementation assumes
|
|
88
|
-
//
|
|
90
|
+
// The default implementation only assumes the entry helper's create_cb is
|
|
91
|
+
// called at Lookup() time and not Insert() time, so should work for all
|
|
92
|
+
// foreseeable implementations.
|
|
89
93
|
virtual Status InsertSaved(const Slice& key, const Slice& saved);
|
|
90
94
|
|
|
91
95
|
// Lookup the data for the given key in this cache. The create_cb
|