@nxtedition/rocksdb 15.4.1 → 16.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +70 -23
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/index.js +70 -10
- package/iterator.js +25 -3
- package/max_rev_operator.h +9 -5
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#pragma once
|
|
7
|
+
|
|
8
|
+
#include <atomic>
|
|
9
|
+
#include <functional>
|
|
10
|
+
#include <memory>
|
|
11
|
+
#include <unordered_map>
|
|
12
|
+
#include <unordered_set>
|
|
13
|
+
#include <vector>
|
|
14
|
+
|
|
15
|
+
#include "rocksdb/options.h"
|
|
16
|
+
#include "rocksdb/rocksdb_namespace.h"
|
|
17
|
+
#include "rocksdb/status.h"
|
|
18
|
+
|
|
19
|
+
namespace ROCKSDB_NAMESPACE {
|
|
20
|
+
|
|
21
|
+
class FileSystem;
|
|
22
|
+
class Statistics;
|
|
23
|
+
|
|
24
|
+
// Forward declaration for internal implementation
|
|
25
|
+
struct IODispatcherImplData;
|
|
26
|
+
struct PendingPrefetchRequest;
|
|
27
|
+
|
|
28
|
+
// Options for configuring IODispatcher behavior
|
|
29
|
+
struct IODispatcherOptions {
|
|
30
|
+
// Maximum memory (in bytes) for prefetching across all ReadSets.
|
|
31
|
+
// When this limit is reached, SubmitJob() blocks until memory is released.
|
|
32
|
+
// Set to 0 (default) for unlimited prefetch memory.
|
|
33
|
+
size_t max_prefetch_memory_bytes = 0;
|
|
34
|
+
|
|
35
|
+
// Optional statistics for tracking memory limiter metrics
|
|
36
|
+
Statistics* statistics = nullptr;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/*
|
|
40
|
+
* IODispatcher is a class that allows users to submit groups of IO jobs to be
|
|
41
|
+
* dispatched asynchronously (or synchronously), upon submission the
|
|
42
|
+
* IODispatcher will return a ReadSet which act as an ownership object of those
|
|
43
|
+
* IOs. Users read from their readset when they require the data, and either
|
|
44
|
+
* poll for completion of the block, or read synchronously if the block is not
|
|
45
|
+
* in cache at that point.
|
|
46
|
+
*
|
|
47
|
+
* ReadSets have RAII semantics, meaning on destruction they will cancel any on
|
|
48
|
+
* going IO, and release the underlying pinned blocks.
|
|
49
|
+
*
|
|
50
|
+
* IODispatcher main goal is to act as control plane for all readers using the
|
|
51
|
+
* dispatcher, allowing for future ratelimiting and smarter dispatching policies
|
|
52
|
+
* in the future.
|
|
53
|
+
*
|
|
54
|
+
* Example 1: Basic Usage
|
|
55
|
+
* ----------------------
|
|
56
|
+
* // Submitting an IO job and reading blocks:
|
|
57
|
+
* //
|
|
58
|
+
* // std::shared_ptr<IOJob> job = std::make_shared<IOJob>();
|
|
59
|
+
* // job->table = table_reader; // Provided BlockBasedTable*
|
|
60
|
+
* // job->job_options.io_coalesce_threshold = 32 * 1024;
|
|
61
|
+
* // job->job_options.read_options = read_options; // Provided ReadOptions
|
|
62
|
+
* //
|
|
63
|
+
* // // Populate the job with block handles (e.g., from an index/iterator)
|
|
64
|
+
* // job->block_handles.push_back(handle1);
|
|
65
|
+
* // job->block_handles.push_back(handle2);
|
|
66
|
+
* // job->block_handles.push_back(handle3);
|
|
67
|
+
* //
|
|
68
|
+
* // std::unique_ptr<IODispatcher> dispatcher(NewIODispatcher());
|
|
69
|
+
* // std::shared_ptr<ReadSet> read_set;
|
|
70
|
+
* // Status s = dispatcher->SubmitJob(job, &read_set);
|
|
71
|
+
* // if (!s.ok()) {
|
|
72
|
+
* // // Handle submit error
|
|
73
|
+
* // }
|
|
74
|
+
* //
|
|
75
|
+
* // // Read by index
|
|
76
|
+
* // for (size_t i = 1; i < job->block_handles.size(); ++i) {
|
|
77
|
+
* // CachableEntry<Block> block_entry;
|
|
78
|
+
* // Status rs = read_set->ReadIndex(i, &block_entry);
|
|
79
|
+
* // if (!rs.ok()) {
|
|
80
|
+
* // // Handle read error
|
|
81
|
+
* // continue;
|
|
82
|
+
* // }
|
|
83
|
+
* // // Use block_entry (block contents are pinned here)
|
|
84
|
+
* // }
|
|
85
|
+
* //
|
|
86
|
+
* // // Or read by byte offset
|
|
87
|
+
* // {
|
|
88
|
+
* // size_t offset =
|
|
89
|
+
static_cast<size_t>(job->block_handles.front().offset());
|
|
90
|
+
* // CachableEntry<Block> block_entry;
|
|
91
|
+
* // Status rs = read_set->ReadOffset(offset, &block_entry);
|
|
92
|
+
* // if (rs.ok()) {
|
|
93
|
+
* // // Use block_entry
|
|
94
|
+
* // }
|
|
95
|
+
* // }
|
|
96
|
+
* //
|
|
97
|
+
* // // Stats
|
|
98
|
+
* // uint64_t cache_hits = read_set->GetNumCacheHits();
|
|
99
|
+
* // uint64_t async_reads = read_set->GetNumAsyncReads();
|
|
100
|
+
* // uint64_t sync_reads = read_set->GetNumSyncReads();
|
|
101
|
+
*
|
|
102
|
+
* Example 2: Memory-Limited Prefetching
|
|
103
|
+
* -------------------------------------
|
|
104
|
+
* // Configure a memory budget for prefetching to prevent unbounded memory use.
|
|
105
|
+
* // When the budget is exceeded, IODispatcher uses "partial prefetch":
|
|
106
|
+
* // - Dispatches as many blocks as fit in available memory (earlier first)
|
|
107
|
+
* // - Queues remaining blocks for later dispatch when memory is released
|
|
108
|
+
* // - Never blocks on SubmitJob - remaining blocks are read on-demand
|
|
109
|
+
* //
|
|
110
|
+
* // IODispatcherOptions opts;
|
|
111
|
+
* // opts.max_prefetch_memory_bytes = 64 * 1024 * 1024; // 64MB budget
|
|
112
|
+
* // opts.statistics = db_options.statistics.get(); // Optional metrics
|
|
113
|
+
* //
|
|
114
|
+
* // std::unique_ptr<IODispatcher> dispatcher(NewIODispatcher(opts));
|
|
115
|
+
* //
|
|
116
|
+
* // // Submit a job that needs more memory than available
|
|
117
|
+
* // // Partial prefetch will dispatch what fits immediately
|
|
118
|
+
* // std::shared_ptr<ReadSet> read_set;
|
|
119
|
+
* // Status s = dispatcher->SubmitJob(job, &read_set); // Never blocks
|
|
120
|
+
* //
|
|
121
|
+
* // // Read blocks in order - earlier blocks are more likely to be prefetched
|
|
122
|
+
* // for (size_t i = 0; i < job->block_handles.size(); ++i) {
|
|
123
|
+
* // CachableEntry<Block> block;
|
|
124
|
+
* // Status rs = read_set->ReadIndex(i, &block);
|
|
125
|
+
* // // Use block...
|
|
126
|
+
* //
|
|
127
|
+
* // // Release block when done to free memory for pending prefetches
|
|
128
|
+
* // read_set->ReleaseBlock(i); // Triggers dispatch of queued blocks
|
|
129
|
+
* // }
|
|
130
|
+
* //
|
|
131
|
+
* // Memory limiting statistics (when statistics is configured):
|
|
132
|
+
* // - PREFETCH_MEMORY_BYTES_GRANTED: Total bytes acquired for prefetching
|
|
133
|
+
* // - PREFETCH_MEMORY_BYTES_RELEASED: Total bytes released after use
|
|
134
|
+
* // - PREFETCH_MEMORY_REQUESTS_BLOCKED: Number of blocks that couldn't be
|
|
135
|
+
* // prefetched immediately due to memory pressure
|
|
136
|
+
|
|
137
|
+
*/
|
|
138
|
+
|
|
139
|
+
class BlockHandle;
|
|
140
|
+
struct ReadOptions;
|
|
141
|
+
struct AsyncIOState;
|
|
142
|
+
|
|
143
|
+
template <typename T>
|
|
144
|
+
class CachableEntry;
|
|
145
|
+
class Block;
|
|
146
|
+
class BlockBasedTable;
|
|
147
|
+
|
|
148
|
+
struct JobOptions {
|
|
149
|
+
uint64_t io_coalesce_threshold = 16 * 1024;
|
|
150
|
+
ReadOptions read_options;
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
class IOJob {
|
|
154
|
+
public:
|
|
155
|
+
std::vector<BlockHandle> block_handles;
|
|
156
|
+
|
|
157
|
+
// Table reader for accessing block cache and index
|
|
158
|
+
BlockBasedTable* table = nullptr;
|
|
159
|
+
|
|
160
|
+
// Job execution options
|
|
161
|
+
JobOptions job_options;
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
/*
|
|
165
|
+
* ReadSet represents a set of blocks that may be in cache, being read
|
|
166
|
+
* asynchronously, or need to be read synchronously. The Read() method
|
|
167
|
+
* transparently handles all three cases.
|
|
168
|
+
*/
|
|
169
|
+
class ReadSet {
|
|
170
|
+
public:
|
|
171
|
+
ReadSet() = default;
|
|
172
|
+
~ReadSet();
|
|
173
|
+
|
|
174
|
+
ReadSet(const ReadSet&) = delete;
|
|
175
|
+
ReadSet& operator=(const ReadSet&) = delete;
|
|
176
|
+
ReadSet(ReadSet&&) noexcept = delete;
|
|
177
|
+
ReadSet& operator=(ReadSet&&) noexcept = delete;
|
|
178
|
+
|
|
179
|
+
// Read a block by index
|
|
180
|
+
// - If the block is in cache, returns it immediately
|
|
181
|
+
// - If the block is being read asynchronously, polls for completion and
|
|
182
|
+
// returns it
|
|
183
|
+
// - If the block needs to be read, performs a synchronous read and returns it
|
|
184
|
+
//
|
|
185
|
+
// block_index: Index into the original IOJob's block_handles vector
|
|
186
|
+
// out: Output parameter for the pinned block entry
|
|
187
|
+
//
|
|
188
|
+
// Returns: Status::OK() on success, error status otherwise
|
|
189
|
+
Status ReadIndex(size_t block_index, CachableEntry<Block>* out);
|
|
190
|
+
// Read a block by offset
|
|
191
|
+
// - If the block is in cache, returns it immediately
|
|
192
|
+
// - If the block is being read asynchronously, polls for completion and
|
|
193
|
+
// returns it
|
|
194
|
+
// - If the block needs to be read, performs a synchronous read and returns it
|
|
195
|
+
|
|
196
|
+
// block_offset: Byte Offset into the SST file of the block.
|
|
197
|
+
|
|
198
|
+
// out: Output parameter for the pinned block entry
|
|
199
|
+
Status ReadOffset(size_t offset, CachableEntry<Block>* out);
|
|
200
|
+
|
|
201
|
+
// Release a block by index, unpinning it from cache.
|
|
202
|
+
// After this call, ReadIndex() for this block will return an error.
|
|
203
|
+
// This is useful for eager memory reclamation when blocks are no longer
|
|
204
|
+
// needed.
|
|
205
|
+
void ReleaseBlock(size_t block_index);
|
|
206
|
+
|
|
207
|
+
// Check if a block at the given index is still available (not released).
|
|
208
|
+
// Returns true if the block can be read, false otherwise.
|
|
209
|
+
bool IsBlockAvailable(size_t block_index) const;
|
|
210
|
+
|
|
211
|
+
// Statistics accessors
|
|
212
|
+
uint64_t GetNumSyncReads() const { return num_sync_reads_; }
|
|
213
|
+
uint64_t GetNumAsyncReads() const { return num_async_reads_; }
|
|
214
|
+
uint64_t GetNumCacheHits() const { return num_cache_hits_; }
|
|
215
|
+
|
|
216
|
+
private:
|
|
217
|
+
friend class IODispatcherImpl;
|
|
218
|
+
|
|
219
|
+
// Job data
|
|
220
|
+
std::shared_ptr<IOJob> job_;
|
|
221
|
+
|
|
222
|
+
// FileSystem for calling AbortIO in destructor
|
|
223
|
+
std::shared_ptr<FileSystem> fs_;
|
|
224
|
+
|
|
225
|
+
// Storage for pinned blocks (one per block handle in the job)
|
|
226
|
+
std::vector<CachableEntry<Block>> pinned_blocks_;
|
|
227
|
+
|
|
228
|
+
// Sorted index for binary search in ReadOffset.
|
|
229
|
+
// sorted_block_indices_[i] is the original index of the i-th smallest block
|
|
230
|
+
// by offset. Built once during SubmitJob for O(log n) ReadOffset lookups.
|
|
231
|
+
std::vector<size_t> sorted_block_indices_;
|
|
232
|
+
|
|
233
|
+
// Map from block index to async IO state for blocks being read
|
|
234
|
+
// asynchronously. Multiple block indices may map to the same async state when
|
|
235
|
+
// blocks are coalesced into a single IO request.
|
|
236
|
+
std::unordered_map<size_t, std::shared_ptr<AsyncIOState>> async_io_map_;
|
|
237
|
+
|
|
238
|
+
// For memory release notifications back to dispatcher (weak ref to avoid
|
|
239
|
+
// cycles)
|
|
240
|
+
std::weak_ptr<IODispatcherImplData> dispatcher_data_;
|
|
241
|
+
|
|
242
|
+
// Size of each block (parallel to pinned_blocks_) for memory accounting
|
|
243
|
+
std::vector<size_t> block_sizes_;
|
|
244
|
+
|
|
245
|
+
// Statistics counters
|
|
246
|
+
std::atomic<uint64_t> num_sync_reads_ = 0;
|
|
247
|
+
std::atomic<uint64_t> num_async_reads_ = 0;
|
|
248
|
+
std::atomic<uint64_t> num_cache_hits_ = 0;
|
|
249
|
+
|
|
250
|
+
// Poll and process a specific async IO request
|
|
251
|
+
Status PollAndProcessAsyncIO(
|
|
252
|
+
const std::shared_ptr<AsyncIOState>& async_state);
|
|
253
|
+
|
|
254
|
+
// Perform synchronous read for a specific block
|
|
255
|
+
Status SyncRead(size_t block_index);
|
|
256
|
+
|
|
257
|
+
// Remove a block from pending prefetch (called by ReadIndex/ReleaseBlock)
|
|
258
|
+
void RemoveFromPending(size_t block_index);
|
|
259
|
+
|
|
260
|
+
// Atomic flags indicating if block is pending prefetch (lock-free check)
|
|
261
|
+
std::unique_ptr<std::atomic<bool>[]> pending_prefetch_flags_;
|
|
262
|
+
size_t pending_prefetch_flags_size_ = 0;
|
|
263
|
+
|
|
264
|
+
// Reference to pending request (for removal notification)
|
|
265
|
+
std::shared_ptr<PendingPrefetchRequest> pending_request_;
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
/*
|
|
269
|
+
* IODispatcher handles IO operations synchronously or asynchronously based
|
|
270
|
+
* on JobOptions. When async is true, it uses ReadAsync; when false, it uses
|
|
271
|
+
* standard synchronous reads.
|
|
272
|
+
* */
|
|
273
|
+
class IODispatcher {
|
|
274
|
+
protected:
|
|
275
|
+
IODispatcher() = default;
|
|
276
|
+
|
|
277
|
+
public:
|
|
278
|
+
virtual ~IODispatcher() {}
|
|
279
|
+
|
|
280
|
+
IODispatcher(const IODispatcher&) = delete;
|
|
281
|
+
IODispatcher& operator=(const IODispatcher&) = delete;
|
|
282
|
+
IODispatcher(IODispatcher&&) = delete;
|
|
283
|
+
IODispatcher& operator=(IODispatcher&&) = delete;
|
|
284
|
+
|
|
285
|
+
// Submit a job for IO processing
|
|
286
|
+
// job: The IO job to submit
|
|
287
|
+
// read_set: Output parameter that will be populated with the ReadSet on
|
|
288
|
+
// success Returns: Status::OK() on success, error status otherwise
|
|
289
|
+
virtual Status SubmitJob(const std::shared_ptr<IOJob>& job,
|
|
290
|
+
std::shared_ptr<ReadSet>* read_set) = 0;
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// Create IODispatcher with default options (no memory limit)
|
|
294
|
+
IODispatcher* NewIODispatcher();
|
|
295
|
+
|
|
296
|
+
// Create IODispatcher with custom options
|
|
297
|
+
IODispatcher* NewIODispatcher(const IODispatcherOptions& options);
|
|
298
|
+
|
|
299
|
+
// TrackingIODispatcher wraps another IODispatcher and tracks all ReadSets
|
|
300
|
+
// created. This is useful for testing to verify IO statistics.
|
|
301
|
+
class TrackingIODispatcher : public IODispatcher {
|
|
302
|
+
public:
|
|
303
|
+
TrackingIODispatcher() : impl_(NewIODispatcher()) {}
|
|
304
|
+
explicit TrackingIODispatcher(IODispatcher* impl) : impl_(impl) {}
|
|
305
|
+
|
|
306
|
+
Status SubmitJob(const std::shared_ptr<IOJob>& job,
|
|
307
|
+
std::shared_ptr<ReadSet>* read_set) override {
|
|
308
|
+
Status s = impl_->SubmitJob(job, read_set);
|
|
309
|
+
if (s.ok() && read_set && *read_set) {
|
|
310
|
+
read_sets_.push_back(*read_set);
|
|
311
|
+
}
|
|
312
|
+
return s;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Get all ReadSets created by this dispatcher
|
|
316
|
+
const std::vector<std::shared_ptr<ReadSet>>& GetReadSets() const {
|
|
317
|
+
return read_sets_;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Get aggregated statistics from all ReadSets
|
|
321
|
+
uint64_t GetTotalSyncReads() const {
|
|
322
|
+
uint64_t total = 0;
|
|
323
|
+
for (const auto& rs : read_sets_) {
|
|
324
|
+
total += rs->GetNumSyncReads();
|
|
325
|
+
}
|
|
326
|
+
return total;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
uint64_t GetTotalAsyncReads() const {
|
|
330
|
+
uint64_t total = 0;
|
|
331
|
+
for (const auto& rs : read_sets_) {
|
|
332
|
+
total += rs->GetNumAsyncReads();
|
|
333
|
+
}
|
|
334
|
+
return total;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
uint64_t GetTotalCacheHits() const {
|
|
338
|
+
uint64_t total = 0;
|
|
339
|
+
for (const auto& rs : read_sets_) {
|
|
340
|
+
total += rs->GetNumCacheHits();
|
|
341
|
+
}
|
|
342
|
+
return total;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Get total IO operations (sum of all types)
|
|
346
|
+
uint64_t GetTotalIOOperations() const {
|
|
347
|
+
return GetTotalSyncReads() + GetTotalAsyncReads() + GetTotalCacheHits();
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Clear tracked ReadSets
|
|
351
|
+
void ClearReadSets() { read_sets_.clear(); }
|
|
352
|
+
|
|
353
|
+
private:
|
|
354
|
+
std::unique_ptr<IODispatcher> impl_;
|
|
355
|
+
std::vector<std::shared_ptr<ReadSet>> read_sets_;
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -38,6 +38,10 @@ struct FileIOByTemperature {
|
|
|
38
38
|
uint64_t cold_file_bytes_read;
|
|
39
39
|
// the number of bytes read to Temperature::kIce file
|
|
40
40
|
uint64_t ice_file_bytes_read;
|
|
41
|
+
// the number of bytes read to Temperature::kUnknown file not in last level
|
|
42
|
+
uint64_t unknown_non_last_level_bytes_read;
|
|
43
|
+
// the number of bytes read to Temperature::kUnknown file in last level
|
|
44
|
+
uint64_t unknown_last_level_bytes_read;
|
|
41
45
|
// total number of reads to Temperature::kHot file
|
|
42
46
|
uint64_t hot_file_read_count;
|
|
43
47
|
// total number of reads to Temperature::kWarm file
|
|
@@ -48,6 +52,11 @@ struct FileIOByTemperature {
|
|
|
48
52
|
uint64_t cold_file_read_count;
|
|
49
53
|
// total number of reads to Temperature::kIce file
|
|
50
54
|
uint64_t ice_file_read_count;
|
|
55
|
+
// total number of reads to Temperature::kUnknown file not in last level
|
|
56
|
+
uint64_t unknown_non_last_level_read_count;
|
|
57
|
+
// total number of reads to Temperature::kUnknown file in last level
|
|
58
|
+
uint64_t unknown_last_level_read_count;
|
|
59
|
+
|
|
51
60
|
// reset all the statistics to 0.
|
|
52
61
|
void Reset() {
|
|
53
62
|
hot_file_bytes_read = 0;
|
|
@@ -55,11 +64,15 @@ struct FileIOByTemperature {
|
|
|
55
64
|
cool_file_bytes_read = 0;
|
|
56
65
|
cold_file_bytes_read = 0;
|
|
57
66
|
ice_file_bytes_read = 0;
|
|
67
|
+
unknown_non_last_level_bytes_read = 0;
|
|
68
|
+
unknown_last_level_bytes_read = 0;
|
|
58
69
|
hot_file_read_count = 0;
|
|
59
70
|
warm_file_read_count = 0;
|
|
60
71
|
cool_file_read_count = 0;
|
|
61
72
|
cold_file_read_count = 0;
|
|
62
73
|
ice_file_read_count = 0;
|
|
74
|
+
unknown_non_last_level_read_count = 0;
|
|
75
|
+
unknown_last_level_read_count = 0;
|
|
63
76
|
}
|
|
64
77
|
};
|
|
65
78
|
|
|
@@ -201,6 +201,7 @@ enum class BackgroundErrorReason {
|
|
|
201
201
|
kManifestWrite,
|
|
202
202
|
kFlushNoWAL,
|
|
203
203
|
kManifestWriteNoWAL,
|
|
204
|
+
kAsyncFileOpen,
|
|
204
205
|
};
|
|
205
206
|
|
|
206
207
|
struct WriteStallInfo {
|
|
@@ -488,6 +489,9 @@ struct CompactionJobInfo {
|
|
|
488
489
|
// Information about blob files deleted during compaction in Integrated
|
|
489
490
|
// BlobDB.
|
|
490
491
|
std::vector<BlobFileGarbageInfo> blob_file_garbage_infos;
|
|
492
|
+
|
|
493
|
+
// Whether this compaction was aborted via AbortAllCompactions()
|
|
494
|
+
bool aborted = false;
|
|
491
495
|
};
|
|
492
496
|
|
|
493
497
|
struct MemTableInfo {
|
|
@@ -550,6 +554,34 @@ struct IOErrorInfo {
|
|
|
550
554
|
uint64_t offset;
|
|
551
555
|
};
|
|
552
556
|
|
|
557
|
+
// EXPERIMENTAL — under active development, fields may change.
|
|
558
|
+
// Point-in-time snapshot of background job pressure for one DB: how busy
|
|
559
|
+
// compaction and flush are, and how close the DB is to write-stalling.
|
|
560
|
+
struct BackgroundJobPressure {
|
|
561
|
+
// Compaction scheduling (LOW + BOTTOM priority combined)
|
|
562
|
+
int compaction_scheduled = 0;
|
|
563
|
+
int compaction_running = 0;
|
|
564
|
+
|
|
565
|
+
// Per-priority compaction breakdown
|
|
566
|
+
int compaction_low_scheduled = 0;
|
|
567
|
+
int compaction_low_running = 0;
|
|
568
|
+
int compaction_bottom_scheduled = 0;
|
|
569
|
+
int compaction_bottom_running = 0;
|
|
570
|
+
|
|
571
|
+
// Flush scheduling
|
|
572
|
+
int flush_scheduled = 0;
|
|
573
|
+
int flush_running = 0;
|
|
574
|
+
|
|
575
|
+
// How close the DB is to a write stall, as a percentage (0 = healthy,
|
|
576
|
+
// 100 = at stall threshold). Can exceed 100 when already stalling.
|
|
577
|
+
// Max across all column families based on write-stall triggers.
|
|
578
|
+
int write_stall_proximity_pct = 0;
|
|
579
|
+
// Whether RocksDB has activated compaction speedup due to write pressure
|
|
580
|
+
bool compaction_speedup_active = false;
|
|
581
|
+
|
|
582
|
+
bool operator==(const BackgroundJobPressure&) const = default;
|
|
583
|
+
};
|
|
584
|
+
|
|
553
585
|
// EventListener class contains a set of callback functions that will
|
|
554
586
|
// be called when specific RocksDB event happens such as flush. It can
|
|
555
587
|
// be used as a building block for developing custom features such as
|
|
@@ -866,6 +898,17 @@ class EventListener : public Customizable {
|
|
|
866
898
|
// happens. ShouldBeNotifiedOnFileIO should be set to true to get a callback.
|
|
867
899
|
virtual void OnIOError(const IOErrorInfo& /*info*/) {}
|
|
868
900
|
|
|
901
|
+
// EXPERIMENTAL
|
|
902
|
+
// Called after a flush or compaction background job completes, providing a
|
|
903
|
+
// snapshot of current background job scheduling pressure and write-stall
|
|
904
|
+
// proximity. Fires on the background thread that completed the job, without
|
|
905
|
+
// holding db_mutex_. This callback fires on every completion, even if
|
|
906
|
+
// pressure values have not changed from the previous call.
|
|
907
|
+
// Implementations should not run for an extended period of time before
|
|
908
|
+
// returning, as this blocks RocksDB background work.
|
|
909
|
+
virtual void OnBackgroundJobPressureChanged(
|
|
910
|
+
DB* /*db*/, const BackgroundJobPressure& /*pressure*/) {}
|
|
911
|
+
|
|
869
912
|
~EventListener() override {}
|
|
870
913
|
};
|
|
871
914
|
|
|
@@ -211,6 +211,26 @@ class MemTableRep {
|
|
|
211
211
|
return Status::NotSupported("GetAndValidate() not implemented.");
|
|
212
212
|
}
|
|
213
213
|
|
|
214
|
+
// Batch lookup of multiple sorted keys. For each key, finds the first
|
|
215
|
+
// matching entry and calls callback_func with callback_args[i] and the
|
|
216
|
+
// entry. Continues calling for subsequent entries until callback_func
|
|
217
|
+
// returns false.
|
|
218
|
+
//
|
|
219
|
+
// Keys must be memtable-encoded and in non-decreasing order. Implementations
|
|
220
|
+
// may exploit the sorted key order for more efficient lookups.
|
|
221
|
+
//
|
|
222
|
+
// When detect_key_out_of_order is true, validates key ordering during
|
|
223
|
+
// traversal and returns Corruption if out-of-order keys are found.
|
|
224
|
+
// When key_validation_callback is non-null, calls it on each visited node.
|
|
225
|
+
//
|
|
226
|
+
// Default implementation calls Get() per key via an iterator.
|
|
227
|
+
virtual Status MultiGet(
|
|
228
|
+
size_t num_keys, const char* const* keys, void** callback_args,
|
|
229
|
+
bool (*callback_func)(void* arg, const char* entry),
|
|
230
|
+
bool allow_data_in_errors = false, bool detect_key_out_of_order = false,
|
|
231
|
+
const std::function<Status(const char*, bool)>& key_validation_callback =
|
|
232
|
+
nullptr);
|
|
233
|
+
|
|
214
234
|
virtual uint64_t ApproximateNumEntries(const Slice& /*start_ikey*/,
|
|
215
235
|
const Slice& /*end_key*/) {
|
|
216
236
|
return 0;
|
|
@@ -58,6 +58,7 @@ class InternalKeyComparator;
|
|
|
58
58
|
class WalFilter;
|
|
59
59
|
class FileSystem;
|
|
60
60
|
class UserDefinedIndexFactory;
|
|
61
|
+
class IODispatcher;
|
|
61
62
|
|
|
62
63
|
struct Options;
|
|
63
64
|
struct DbPath;
|
|
@@ -304,9 +305,6 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
|
|
|
304
305
|
// Dynamically changeable through SetOptions() API
|
|
305
306
|
uint64_t max_bytes_for_level_base = 256 * 1048576;
|
|
306
307
|
|
|
307
|
-
// Deprecated.
|
|
308
|
-
uint64_t snap_refresh_nanos = 0;
|
|
309
|
-
|
|
310
308
|
// Disable automatic compactions. Manual compactions can still
|
|
311
309
|
// be issued on this column family
|
|
312
310
|
//
|
|
@@ -785,6 +783,25 @@ struct DBOptions {
|
|
|
785
783
|
// Default: 16
|
|
786
784
|
int max_file_opening_threads = 16;
|
|
787
785
|
|
|
786
|
+
// If true, SST files are opened and validated asynchronously in the
|
|
787
|
+
// background after DB::Open returns. This reduces DB open time for
|
|
788
|
+
// databases with many SST files and high latency file systems. Mostly useful
|
|
789
|
+
// when max_open_files = -1, as max_open_files != -1 usually has fast open
|
|
790
|
+
// times. See also `max_file_opening_threads` and
|
|
791
|
+
// `skip_stats_update_on_db_open` to improve file open latency.
|
|
792
|
+
//
|
|
793
|
+
// Note: This option is currently not compatible with FIFO compaction and
|
|
794
|
+
// requires skip_stats_update_on_db_open=true.
|
|
795
|
+
//
|
|
796
|
+
// Errors will no longer show up in DB::Open, but instead can show up as
|
|
797
|
+
// either background errors and/or operations that access the file (e.g.
|
|
798
|
+
// reads, compactions).
|
|
799
|
+
//
|
|
800
|
+
// When false (default), SST files are opened and validated during DB::Open.
|
|
801
|
+
//
|
|
802
|
+
// Default: false
|
|
803
|
+
bool open_files_async = false;
|
|
804
|
+
|
|
788
805
|
// Once write-ahead logs exceed this size, we will start forcing the flush of
|
|
789
806
|
// column families whose memtables are backed by the oldest live WAL file
|
|
790
807
|
// (i.e. the ones that are causing all the space amplification). If set to 0
|
|
@@ -977,6 +994,13 @@ struct DBOptions {
|
|
|
977
994
|
// manifest write (e.g. completed DB compaction or flush).
|
|
978
995
|
uint64_t max_manifest_file_size = 1024 * 1024 * 1024;
|
|
979
996
|
|
|
997
|
+
// If true, on DB close, read back the entire MANIFEST file and validate
|
|
998
|
+
// CRC checksums and logical record content. If corruption is detected,
|
|
999
|
+
// a fresh MANIFEST is written from in-memory state before closing.
|
|
1000
|
+
//
|
|
1001
|
+
// This option is mutable with SetDBOptions().
|
|
1002
|
+
bool verify_manifest_content_on_close = false;
|
|
1003
|
+
|
|
980
1004
|
// This option mostly replaces max_manifest_file_size to control an auto-tuned
|
|
981
1005
|
// balance of manifest write amplification and space amplification. A new
|
|
982
1006
|
// manifest file is created with the "compacted" contents of the old one when
|
|
@@ -1362,17 +1386,6 @@ struct DBOptions {
|
|
|
1362
1386
|
// Default: false
|
|
1363
1387
|
bool skip_stats_update_on_db_open = false;
|
|
1364
1388
|
|
|
1365
|
-
// This option is deprecated and marked as no-op. Kept for backward
|
|
1366
|
-
// compatibility until usage is fully removed.
|
|
1367
|
-
// File size check will be performed through a thread
|
|
1368
|
-
// pool during DB Open, when max_open_files is set to -1.
|
|
1369
|
-
// Therefore, the concern of DB Open slowness is eliminated.
|
|
1370
|
-
// Note that when max_open_files is not set to -1, only a subset of files will
|
|
1371
|
-
// be opened and checked during DB Open.
|
|
1372
|
-
//
|
|
1373
|
-
// Default: false
|
|
1374
|
-
bool skip_checking_sst_file_sizes_on_db_open = false;
|
|
1375
|
-
|
|
1376
1389
|
// Recovery mode to control the consistency while replaying WAL
|
|
1377
1390
|
// Default: kPointInTimeRecovery
|
|
1378
1391
|
WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
|
|
@@ -1405,9 +1418,30 @@ struct DBOptions {
|
|
|
1405
1418
|
// WAL logs will be kept, so that if crash happened before flush, we still
|
|
1406
1419
|
// have logs to recover from.
|
|
1407
1420
|
//
|
|
1421
|
+
// Note: when `enforce_write_buffer_manager_during_recovery` is also enabled,
|
|
1422
|
+
// flushes may still occur during recovery to respect the
|
|
1423
|
+
// WriteBufferManager's global memory limit, even if this option is true.
|
|
1424
|
+
// Once any such WBM-triggered flush happens, all remaining memtables will
|
|
1425
|
+
// also be flushed at the end of recovery (similar to the behavior when this
|
|
1426
|
+
// option is false).
|
|
1427
|
+
//
|
|
1408
1428
|
// DEFAULT: false
|
|
1409
1429
|
bool avoid_flush_during_recovery = false;
|
|
1410
1430
|
|
|
1431
|
+
// If true and a WriteBufferManager is configured, RocksDB will check
|
|
1432
|
+
// WriteBufferManager::ShouldFlush() during WAL recovery and schedule
|
|
1433
|
+
// flushes when needed. This prevents OOM when multiple RocksDB instances
|
|
1434
|
+
// share a WriteBufferManager and one instance is recovering from WAL.
|
|
1435
|
+
//
|
|
1436
|
+
// When triggered, all column families with non-empty memtables are scheduled
|
|
1437
|
+
// for flush, which may produce smaller L0 files in some column families.
|
|
1438
|
+
// This also overrides `avoid_flush_during_recovery`: once a WBM-triggered
|
|
1439
|
+
// flush occurs mid-recovery, all remaining non-empty memtables will be
|
|
1440
|
+
// flushed at the end of recovery as well.
|
|
1441
|
+
//
|
|
1442
|
+
// DEFAULT: true
|
|
1443
|
+
bool enforce_write_buffer_manager_during_recovery = true;
|
|
1444
|
+
|
|
1411
1445
|
// By default RocksDB will flush all memtables on DB close if there are
|
|
1412
1446
|
// unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
|
|
1413
1447
|
// DB close. Unpersisted data WILL BE LOST.
|
|
@@ -1847,11 +1881,13 @@ class MultiScanArgs {
|
|
|
1847
1881
|
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1848
1882
|
max_prefetch_size = other.max_prefetch_size;
|
|
1849
1883
|
use_async_io = other.use_async_io;
|
|
1884
|
+
io_dispatcher = other.io_dispatcher;
|
|
1850
1885
|
}
|
|
1851
1886
|
MultiScanArgs(MultiScanArgs&& other) noexcept
|
|
1852
1887
|
: io_coalesce_threshold(other.io_coalesce_threshold),
|
|
1853
1888
|
max_prefetch_size(other.max_prefetch_size),
|
|
1854
1889
|
use_async_io(other.use_async_io),
|
|
1890
|
+
io_dispatcher(std::move(other.io_dispatcher)),
|
|
1855
1891
|
comp_(other.comp_),
|
|
1856
1892
|
original_ranges_(std::move(other.original_ranges_)) {}
|
|
1857
1893
|
|
|
@@ -1861,6 +1897,7 @@ class MultiScanArgs {
|
|
|
1861
1897
|
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1862
1898
|
max_prefetch_size = other.max_prefetch_size;
|
|
1863
1899
|
use_async_io = other.use_async_io;
|
|
1900
|
+
io_dispatcher = other.io_dispatcher;
|
|
1864
1901
|
return *this;
|
|
1865
1902
|
}
|
|
1866
1903
|
|
|
@@ -1871,6 +1908,7 @@ class MultiScanArgs {
|
|
|
1871
1908
|
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1872
1909
|
max_prefetch_size = other.max_prefetch_size;
|
|
1873
1910
|
use_async_io = other.use_async_io;
|
|
1911
|
+
io_dispatcher = std::move(other.io_dispatcher);
|
|
1874
1912
|
}
|
|
1875
1913
|
return *this;
|
|
1876
1914
|
}
|
|
@@ -1918,6 +1956,7 @@ class MultiScanArgs {
|
|
|
1918
1956
|
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1919
1957
|
max_prefetch_size = other.max_prefetch_size;
|
|
1920
1958
|
use_async_io = other.use_async_io;
|
|
1959
|
+
io_dispatcher = other.io_dispatcher;
|
|
1921
1960
|
}
|
|
1922
1961
|
|
|
1923
1962
|
uint64_t io_coalesce_threshold = 16 << 10; // 16KB by default
|
|
@@ -1939,6 +1978,12 @@ class MultiScanArgs {
|
|
|
1939
1978
|
// When false, it will use synchronous MultiRead().
|
|
1940
1979
|
bool use_async_io = false;
|
|
1941
1980
|
|
|
1981
|
+
// Optional IODispatcher for multi-scan operations.
|
|
1982
|
+
// If nullptr (default), a new IODispatcher is created internally.
|
|
1983
|
+
// Users can provide their own IODispatcher for custom IO scheduling
|
|
1984
|
+
// or for testing/monitoring purposes (e.g., to check IO statistics).
|
|
1985
|
+
std::shared_ptr<IODispatcher> io_dispatcher = nullptr;
|
|
1986
|
+
|
|
1942
1987
|
private:
|
|
1943
1988
|
// The comparator used for ordering ranges
|
|
1944
1989
|
const Comparator* comp_;
|
|
@@ -2108,10 +2153,6 @@ struct ReadOptions {
|
|
|
2108
2153
|
// that were inserted into the database after the creation of the iterator.
|
|
2109
2154
|
bool tailing = false;
|
|
2110
2155
|
|
|
2111
|
-
// This options is not used anymore. It was to turn on a functionality that
|
|
2112
|
-
// has been removed. DEPRECATED
|
|
2113
|
-
bool managed = false;
|
|
2114
|
-
|
|
2115
2156
|
// Enable a total order seek regardless of index format (e.g. hash index)
|
|
2116
2157
|
// used in the table. Some table format (e.g. plain table) may not support
|
|
2117
2158
|
// this option.
|
|
@@ -2241,9 +2282,10 @@ struct ReadOptions {
|
|
|
2241
2282
|
// block based table index. The table_factory used for the column family
|
|
2242
2283
|
// must support building/reading this index.
|
|
2243
2284
|
//
|
|
2244
|
-
//
|
|
2245
|
-
//
|
|
2246
|
-
//
|
|
2285
|
+
// Forward scans (SeekToFirst, Seek, Next) and point lookups (Get) are
|
|
2286
|
+
// supported. Reverse operations (SeekToLast, SeekForPrev, Prev) are not
|
|
2287
|
+
// yet supported and will return NotSupported when this is set. Leave this
|
|
2288
|
+
// null to use the native index for reverse operations.
|
|
2247
2289
|
const UserDefinedIndexFactory* table_index_factory = nullptr;
|
|
2248
2290
|
|
|
2249
2291
|
// *** END options only relevant to iterators or scans ***
|