@nxtedition/rocksdb 8.2.0 → 8.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +3 -3
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +8 -345
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
- package/deps/rocksdb/rocksdb/db/builder.h +2 -2
- package/deps/rocksdb/rocksdb/db/c.cc +76 -5
- package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
- package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
- package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
- package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
- package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
- package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
- package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
- package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
- package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
- package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
- package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
- package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
- package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
- package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
- package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
- package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
- package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
- package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
- package/deps/rocksdb/rocksdb/options/options.cc +12 -53
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
- package/deps/rocksdb/rocksdb/port/lang.h +27 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
- package/deps/rocksdb/rocksdb/src.mk +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
- package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/format.cc +4 -4
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
- package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
- package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
- package/deps/rocksdb/rocksdb/util/compression.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
- package/deps/rocksdb/rocksdb/util/math.h +12 -7
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
- package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
- package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
#include "db/pinned_iterators_manager.h"
|
|
38
38
|
#include "db/table_cache.h"
|
|
39
39
|
#include "db/version_builder.h"
|
|
40
|
+
#include "db/version_edit.h"
|
|
40
41
|
#include "db/version_edit_handler.h"
|
|
41
42
|
#include "table/compaction_merging_iterator.h"
|
|
42
43
|
|
|
@@ -941,7 +942,7 @@ class LevelIterator final : public InternalIterator {
|
|
|
941
942
|
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
|
942
943
|
bool should_sample, HistogramImpl* file_read_hist,
|
|
943
944
|
TableReaderCaller caller, bool skip_filters, int level,
|
|
944
|
-
RangeDelAggregator* range_del_agg,
|
|
945
|
+
uint8_t block_protection_bytes_per_key, RangeDelAggregator* range_del_agg,
|
|
945
946
|
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
|
946
947
|
nullptr,
|
|
947
948
|
bool allow_unprepared_value = false,
|
|
@@ -964,6 +965,7 @@ class LevelIterator final : public InternalIterator {
|
|
|
964
965
|
pinned_iters_mgr_(nullptr),
|
|
965
966
|
compaction_boundaries_(compaction_boundaries),
|
|
966
967
|
is_next_read_sequential_(false),
|
|
968
|
+
block_protection_bytes_per_key_(block_protection_bytes_per_key),
|
|
967
969
|
range_tombstone_iter_(nullptr),
|
|
968
970
|
to_return_sentinel_(false) {
|
|
969
971
|
// Empty level is not supported.
|
|
@@ -1107,7 +1109,8 @@ class LevelIterator final : public InternalIterator {
|
|
|
1107
1109
|
nullptr /* don't need reference to table */, file_read_hist_, caller_,
|
|
1108
1110
|
/*arena=*/nullptr, skip_filters_, level_,
|
|
1109
1111
|
/*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
|
|
1110
|
-
largest_compaction_key, allow_unprepared_value_,
|
|
1112
|
+
largest_compaction_key, allow_unprepared_value_,
|
|
1113
|
+
block_protection_bytes_per_key_, range_tombstone_iter_);
|
|
1111
1114
|
}
|
|
1112
1115
|
|
|
1113
1116
|
// Check if current file being fully within iterate_lower_bound.
|
|
@@ -1154,6 +1157,8 @@ class LevelIterator final : public InternalIterator {
|
|
|
1154
1157
|
|
|
1155
1158
|
bool is_next_read_sequential_;
|
|
1156
1159
|
|
|
1160
|
+
uint8_t block_protection_bytes_per_key_;
|
|
1161
|
+
|
|
1157
1162
|
// This is set when this level iterator is used under a merging iterator
|
|
1158
1163
|
// that processes range tombstones. range_tombstone_iter_ points to where the
|
|
1159
1164
|
// merging iterator stores the range tombstones iterator for this level. When
|
|
@@ -1527,13 +1532,15 @@ void LevelIterator::InitFileIterator(size_t new_file_index) {
|
|
|
1527
1532
|
}
|
|
1528
1533
|
} // anonymous namespace
|
|
1529
1534
|
|
|
1530
|
-
Status Version::GetTableProperties(
|
|
1535
|
+
Status Version::GetTableProperties(const ReadOptions& read_options,
|
|
1536
|
+
std::shared_ptr<const TableProperties>* tp,
|
|
1531
1537
|
const FileMetaData* file_meta,
|
|
1532
1538
|
const std::string* fname) const {
|
|
1533
1539
|
auto table_cache = cfd_->table_cache();
|
|
1534
1540
|
auto ioptions = cfd_->ioptions();
|
|
1535
1541
|
Status s = table_cache->GetTableProperties(
|
|
1536
|
-
file_options_, cfd_->internal_comparator(), *file_meta, tp,
|
|
1542
|
+
file_options_, read_options, cfd_->internal_comparator(), *file_meta, tp,
|
|
1543
|
+
mutable_cf_options_.block_protection_bytes_per_key,
|
|
1537
1544
|
mutable_cf_options_.prefix_extractor, true /* no io */);
|
|
1538
1545
|
if (s.ok()) {
|
|
1539
1546
|
return s;
|
|
@@ -1565,14 +1572,16 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
|
|
|
1565
1572
|
// the magic number check in the footer.
|
|
1566
1573
|
std::unique_ptr<RandomAccessFileReader> file_reader(
|
|
1567
1574
|
new RandomAccessFileReader(
|
|
1568
|
-
std::move(file), file_name,
|
|
1569
|
-
|
|
1570
|
-
|
|
1575
|
+
std::move(file), file_name, ioptions->clock /* clock */, io_tracer_,
|
|
1576
|
+
ioptions->stats /* stats */,
|
|
1577
|
+
Histograms::SST_READ_MICROS /* hist_type */,
|
|
1578
|
+
nullptr /* file_read_hist */, nullptr /* rate_limiter */,
|
|
1579
|
+
ioptions->listeners));
|
|
1571
1580
|
std::unique_ptr<TableProperties> props;
|
|
1572
1581
|
s = ReadTableProperties(
|
|
1573
1582
|
file_reader.get(), file_meta->fd.GetFileSize(),
|
|
1574
1583
|
Footer::kNullTableMagicNumber /* table's magic number */, *ioptions,
|
|
1575
|
-
&props);
|
|
1584
|
+
read_options, &props);
|
|
1576
1585
|
if (!s.ok()) {
|
|
1577
1586
|
return s;
|
|
1578
1587
|
}
|
|
@@ -1581,10 +1590,11 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
|
|
|
1581
1590
|
return s;
|
|
1582
1591
|
}
|
|
1583
1592
|
|
|
1584
|
-
Status Version::GetPropertiesOfAllTables(
|
|
1593
|
+
Status Version::GetPropertiesOfAllTables(const ReadOptions& read_options,
|
|
1594
|
+
TablePropertiesCollection* props) {
|
|
1585
1595
|
Status s;
|
|
1586
1596
|
for (int level = 0; level < storage_info_.num_levels_; level++) {
|
|
1587
|
-
s = GetPropertiesOfAllTables(props, level);
|
|
1597
|
+
s = GetPropertiesOfAllTables(read_options, props, level);
|
|
1588
1598
|
if (!s.ok()) {
|
|
1589
1599
|
return s;
|
|
1590
1600
|
}
|
|
@@ -1602,6 +1612,8 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
|
|
|
1602
1612
|
|
|
1603
1613
|
std::stringstream ss;
|
|
1604
1614
|
|
|
1615
|
+
// TODO: plumb Env::IOActivity
|
|
1616
|
+
const ReadOptions read_options;
|
|
1605
1617
|
for (int level = 0; level < storage_info_.num_levels_; level++) {
|
|
1606
1618
|
for (const auto& file_meta : storage_info_.files_[level]) {
|
|
1607
1619
|
auto fname =
|
|
@@ -1614,7 +1626,8 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
|
|
|
1614
1626
|
std::unique_ptr<FragmentedRangeTombstoneIterator> tombstone_iter;
|
|
1615
1627
|
|
|
1616
1628
|
Status s = table_cache->GetRangeTombstoneIterator(
|
|
1617
|
-
|
|
1629
|
+
read_options, cfd_->internal_comparator(), *file_meta,
|
|
1630
|
+
cfd_->GetLatestMutableCFOptions()->block_protection_bytes_per_key,
|
|
1618
1631
|
&tombstone_iter);
|
|
1619
1632
|
if (!s.ok()) {
|
|
1620
1633
|
return s;
|
|
@@ -1648,7 +1661,8 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
|
|
|
1648
1661
|
return Status::OK();
|
|
1649
1662
|
}
|
|
1650
1663
|
|
|
1651
|
-
Status Version::GetPropertiesOfAllTables(
|
|
1664
|
+
Status Version::GetPropertiesOfAllTables(const ReadOptions& read_options,
|
|
1665
|
+
TablePropertiesCollection* props,
|
|
1652
1666
|
int level) {
|
|
1653
1667
|
for (const auto& file_meta : storage_info_.files_[level]) {
|
|
1654
1668
|
auto fname =
|
|
@@ -1657,7 +1671,8 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props,
|
|
|
1657
1671
|
// 1. If the table is already present in table cache, load table
|
|
1658
1672
|
// properties from there.
|
|
1659
1673
|
std::shared_ptr<const TableProperties> table_properties;
|
|
1660
|
-
Status s =
|
|
1674
|
+
Status s =
|
|
1675
|
+
GetTableProperties(read_options, &table_properties, file_meta, &fname);
|
|
1661
1676
|
if (s.ok()) {
|
|
1662
1677
|
props->insert({fname, table_properties});
|
|
1663
1678
|
} else {
|
|
@@ -1669,7 +1684,8 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props,
|
|
|
1669
1684
|
}
|
|
1670
1685
|
|
|
1671
1686
|
Status Version::GetPropertiesOfTablesInRange(
|
|
1672
|
-
const Range* range, std::size_t n,
|
|
1687
|
+
const ReadOptions& read_options, const Range* range, std::size_t n,
|
|
1688
|
+
TablePropertiesCollection* props) const {
|
|
1673
1689
|
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
|
|
1674
1690
|
for (decltype(n) i = 0; i < n; i++) {
|
|
1675
1691
|
// Convert user_key into a corresponding internal key.
|
|
@@ -1686,7 +1702,8 @@ Status Version::GetPropertiesOfTablesInRange(
|
|
|
1686
1702
|
// 1. If the table is already present in table cache, load table
|
|
1687
1703
|
// properties from there.
|
|
1688
1704
|
std::shared_ptr<const TableProperties> table_properties;
|
|
1689
|
-
Status s = GetTableProperties(&table_properties,
|
|
1705
|
+
Status s = GetTableProperties(read_options, &table_properties,
|
|
1706
|
+
file_meta, &fname);
|
|
1690
1707
|
if (s.ok()) {
|
|
1691
1708
|
props->insert({fname, table_properties});
|
|
1692
1709
|
} else {
|
|
@@ -1701,13 +1718,14 @@ Status Version::GetPropertiesOfTablesInRange(
|
|
|
1701
1718
|
}
|
|
1702
1719
|
|
|
1703
1720
|
Status Version::GetAggregatedTableProperties(
|
|
1704
|
-
std::shared_ptr<const TableProperties>* tp,
|
|
1721
|
+
const ReadOptions& read_options, std::shared_ptr<const TableProperties>* tp,
|
|
1722
|
+
int level) {
|
|
1705
1723
|
TablePropertiesCollection props;
|
|
1706
1724
|
Status s;
|
|
1707
1725
|
if (level < 0) {
|
|
1708
|
-
s = GetPropertiesOfAllTables(&props);
|
|
1726
|
+
s = GetPropertiesOfAllTables(read_options, &props);
|
|
1709
1727
|
} else {
|
|
1710
|
-
s = GetPropertiesOfAllTables(&props, level);
|
|
1728
|
+
s = GetPropertiesOfAllTables(read_options, &props, level);
|
|
1711
1729
|
}
|
|
1712
1730
|
if (!s.ok()) {
|
|
1713
1731
|
return s;
|
|
@@ -1721,13 +1739,14 @@ Status Version::GetAggregatedTableProperties(
|
|
|
1721
1739
|
return Status::OK();
|
|
1722
1740
|
}
|
|
1723
1741
|
|
|
1724
|
-
size_t Version::GetMemoryUsageByTableReaders() {
|
|
1742
|
+
size_t Version::GetMemoryUsageByTableReaders(const ReadOptions& read_options) {
|
|
1725
1743
|
size_t total_usage = 0;
|
|
1726
1744
|
for (auto& file_level : storage_info_.level_files_brief_) {
|
|
1727
1745
|
for (size_t i = 0; i < file_level.num_files; i++) {
|
|
1728
1746
|
total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader(
|
|
1729
|
-
file_options_, cfd_->internal_comparator(),
|
|
1747
|
+
file_options_, read_options, cfd_->internal_comparator(),
|
|
1730
1748
|
*file_level.files[i].file_metadata,
|
|
1749
|
+
mutable_cf_options_.block_protection_bytes_per_key,
|
|
1731
1750
|
mutable_cf_options_.prefix_extractor);
|
|
1732
1751
|
}
|
|
1733
1752
|
}
|
|
@@ -1807,6 +1826,49 @@ uint64_t Version::GetSstFilesSize() {
|
|
|
1807
1826
|
return sst_files_size;
|
|
1808
1827
|
}
|
|
1809
1828
|
|
|
1829
|
+
void Version::GetSstFilesBoundaryKeys(Slice* smallest_user_key,
|
|
1830
|
+
Slice* largest_user_key) {
|
|
1831
|
+
smallest_user_key->clear();
|
|
1832
|
+
largest_user_key->clear();
|
|
1833
|
+
bool initialized = false;
|
|
1834
|
+
const Comparator* ucmp = storage_info_.user_comparator_;
|
|
1835
|
+
for (int level = 0; level < cfd_->NumberLevels(); level++) {
|
|
1836
|
+
if (storage_info_.LevelFiles(level).size() == 0) {
|
|
1837
|
+
continue;
|
|
1838
|
+
}
|
|
1839
|
+
if (level == 0) {
|
|
1840
|
+
// we need to consider all files on level 0
|
|
1841
|
+
for (const auto& file : storage_info_.LevelFiles(level)) {
|
|
1842
|
+
const Slice& start_user_key = file->smallest.user_key();
|
|
1843
|
+
if (!initialized ||
|
|
1844
|
+
ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
|
|
1845
|
+
*smallest_user_key = start_user_key;
|
|
1846
|
+
}
|
|
1847
|
+
const Slice& end_user_key = file->largest.user_key();
|
|
1848
|
+
if (!initialized ||
|
|
1849
|
+
ucmp->Compare(end_user_key, *largest_user_key) > 0) {
|
|
1850
|
+
*largest_user_key = end_user_key;
|
|
1851
|
+
}
|
|
1852
|
+
initialized = true;
|
|
1853
|
+
}
|
|
1854
|
+
} else {
|
|
1855
|
+
// we only need to consider the first and last file
|
|
1856
|
+
const Slice& start_user_key =
|
|
1857
|
+
storage_info_.LevelFiles(level)[0]->smallest.user_key();
|
|
1858
|
+
if (!initialized ||
|
|
1859
|
+
ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
|
|
1860
|
+
*smallest_user_key = start_user_key;
|
|
1861
|
+
}
|
|
1862
|
+
const Slice& end_user_key =
|
|
1863
|
+
storage_info_.LevelFiles(level).back()->largest.user_key();
|
|
1864
|
+
if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) {
|
|
1865
|
+
*largest_user_key = end_user_key;
|
|
1866
|
+
}
|
|
1867
|
+
initialized = true;
|
|
1868
|
+
}
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1810
1872
|
void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
|
|
1811
1873
|
uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
|
|
1812
1874
|
for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
|
|
@@ -1837,6 +1899,7 @@ InternalIterator* Version::TEST_GetLevelIterator(
|
|
|
1837
1899
|
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
|
1838
1900
|
cfd_->internal_stats()->GetFileReadHist(level),
|
|
1839
1901
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
1902
|
+
mutable_cf_options_.block_protection_bytes_per_key,
|
|
1840
1903
|
nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
|
|
1841
1904
|
allow_unprepared_value, &tombstone_iter_ptr);
|
|
1842
1905
|
if (read_options.ignore_range_deletions) {
|
|
@@ -1935,7 +1998,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
1935
1998
|
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
|
1936
1999
|
/*smallest_compaction_key=*/nullptr,
|
|
1937
2000
|
/*largest_compaction_key=*/nullptr, allow_unprepared_value,
|
|
1938
|
-
&tombstone_iter);
|
|
2001
|
+
mutable_cf_options_.block_protection_bytes_per_key, &tombstone_iter);
|
|
1939
2002
|
if (read_options.ignore_range_deletions) {
|
|
1940
2003
|
merge_iter_builder->AddIterator(table_iter);
|
|
1941
2004
|
} else {
|
|
@@ -1964,8 +2027,10 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
1964
2027
|
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
|
1965
2028
|
cfd_->internal_stats()->GetFileReadHist(level),
|
|
1966
2029
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
1967
|
-
|
|
1968
|
-
|
|
2030
|
+
mutable_cf_options_.block_protection_bytes_per_key,
|
|
2031
|
+
/*range_del_agg=*/nullptr,
|
|
2032
|
+
/*compaction_boundaries=*/nullptr, allow_unprepared_value,
|
|
2033
|
+
&tombstone_iter_ptr);
|
|
1969
2034
|
if (read_options.ignore_range_deletions) {
|
|
1970
2035
|
merge_iter_builder->AddIterator(level_iter);
|
|
1971
2036
|
} else {
|
|
@@ -2008,7 +2073,8 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|
|
2008
2073
|
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
|
2009
2074
|
/*smallest_compaction_key=*/nullptr,
|
|
2010
2075
|
/*largest_compaction_key=*/nullptr,
|
|
2011
|
-
/*allow_unprepared_value=*/false
|
|
2076
|
+
/*allow_unprepared_value=*/false,
|
|
2077
|
+
mutable_cf_options_.block_protection_bytes_per_key));
|
|
2012
2078
|
status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
|
|
2013
2079
|
iter.get(), overlap);
|
|
2014
2080
|
if (!status.ok() || *overlap) {
|
|
@@ -2023,7 +2089,8 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|
|
2023
2089
|
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
|
2024
2090
|
cfd_->internal_stats()->GetFileReadHist(level),
|
|
2025
2091
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
2026
|
-
&range_del_agg
|
|
2092
|
+
mutable_cf_options_.block_protection_bytes_per_key, &range_del_agg,
|
|
2093
|
+
nullptr, false));
|
|
2027
2094
|
status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
|
|
2028
2095
|
iter.get(), overlap);
|
|
2029
2096
|
}
|
|
@@ -2050,6 +2117,7 @@ VersionStorageInfo::VersionStorageInfo(
|
|
|
2050
2117
|
compaction_style_(compaction_style),
|
|
2051
2118
|
files_(new std::vector<FileMetaData*>[num_levels_]),
|
|
2052
2119
|
base_level_(num_levels_ == 1 ? -1 : 1),
|
|
2120
|
+
lowest_unnecessary_level_(-1),
|
|
2053
2121
|
level_multiplier_(0.0),
|
|
2054
2122
|
files_by_compaction_pri_(num_levels_),
|
|
2055
2123
|
level0_non_overlapping_(false),
|
|
@@ -2321,7 +2389,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
|
2321
2389
|
StopWatchNano timer(clock_, timer_enabled /* auto_start */);
|
|
2322
2390
|
*status = table_cache_->Get(
|
|
2323
2391
|
read_options, *internal_comparator(), *f->file_metadata, ikey,
|
|
2324
|
-
&get_context, mutable_cf_options_.
|
|
2392
|
+
&get_context, mutable_cf_options_.block_protection_bytes_per_key,
|
|
2393
|
+
mutable_cf_options_.prefix_extractor,
|
|
2325
2394
|
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
|
2326
2395
|
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
|
2327
2396
|
fp.IsHitFileLastInLevel()),
|
|
@@ -2566,7 +2635,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2566
2635
|
read_options, *internal_comparator(), *f->file_metadata,
|
|
2567
2636
|
mutable_cf_options_.prefix_extractor,
|
|
2568
2637
|
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
|
2569
|
-
fp.GetHitFileLevel(), &file_range, &table_handle
|
|
2638
|
+
fp.GetHitFileLevel(), &file_range, &table_handle,
|
|
2639
|
+
mutable_cf_options_.block_protection_bytes_per_key);
|
|
2570
2640
|
skip_range_deletions = true;
|
|
2571
2641
|
if (status.ok()) {
|
|
2572
2642
|
skip_filters = true;
|
|
@@ -2756,7 +2826,8 @@ Status Version::ProcessBatch(
|
|
|
2756
2826
|
read_options, *internal_comparator(), *f->file_metadata,
|
|
2757
2827
|
mutable_cf_options_.prefix_extractor,
|
|
2758
2828
|
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
|
2759
|
-
fp.GetHitFileLevel(), &file_range, &table_handle
|
|
2829
|
+
fp.GetHitFileLevel(), &file_range, &table_handle,
|
|
2830
|
+
mutable_cf_options_.block_protection_bytes_per_key);
|
|
2760
2831
|
if (status.ok()) {
|
|
2761
2832
|
skip_filters = true;
|
|
2762
2833
|
skip_range_deletions = true;
|
|
@@ -2983,24 +3054,26 @@ void VersionStorageInfo::PrepareForVersionAppend(
|
|
|
2983
3054
|
}
|
|
2984
3055
|
|
|
2985
3056
|
void Version::PrepareAppend(const MutableCFOptions& mutable_cf_options,
|
|
3057
|
+
const ReadOptions& read_options,
|
|
2986
3058
|
bool update_stats) {
|
|
2987
3059
|
TEST_SYNC_POINT_CALLBACK(
|
|
2988
3060
|
"Version::PrepareAppend:forced_check",
|
|
2989
3061
|
reinterpret_cast<void*>(&storage_info_.force_consistency_checks_));
|
|
2990
3062
|
|
|
2991
3063
|
if (update_stats) {
|
|
2992
|
-
UpdateAccumulatedStats();
|
|
3064
|
+
UpdateAccumulatedStats(read_options);
|
|
2993
3065
|
}
|
|
2994
3066
|
|
|
2995
3067
|
storage_info_.PrepareForVersionAppend(*cfd_->ioptions(), mutable_cf_options);
|
|
2996
3068
|
}
|
|
2997
3069
|
|
|
2998
|
-
bool Version::MaybeInitializeFileMetaData(
|
|
3070
|
+
bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options,
|
|
3071
|
+
FileMetaData* file_meta) {
|
|
2999
3072
|
if (file_meta->init_stats_from_file || file_meta->compensated_file_size > 0) {
|
|
3000
3073
|
return false;
|
|
3001
3074
|
}
|
|
3002
3075
|
std::shared_ptr<const TableProperties> tp;
|
|
3003
|
-
Status s = GetTableProperties(&tp, file_meta);
|
|
3076
|
+
Status s = GetTableProperties(read_options, &tp, file_meta);
|
|
3004
3077
|
file_meta->init_stats_from_file = true;
|
|
3005
3078
|
if (!s.ok()) {
|
|
3006
3079
|
ROCKS_LOG_ERROR(vset_->db_options_->info_log,
|
|
@@ -3045,7 +3118,7 @@ void VersionStorageInfo::RemoveCurrentStats(FileMetaData* file_meta) {
|
|
|
3045
3118
|
}
|
|
3046
3119
|
}
|
|
3047
3120
|
|
|
3048
|
-
void Version::UpdateAccumulatedStats() {
|
|
3121
|
+
void Version::UpdateAccumulatedStats(const ReadOptions& read_options) {
|
|
3049
3122
|
// maximum number of table properties loaded from files.
|
|
3050
3123
|
const int kMaxInitCount = 20;
|
|
3051
3124
|
int init_count = 0;
|
|
@@ -3063,7 +3136,7 @@ void Version::UpdateAccumulatedStats() {
|
|
|
3063
3136
|
level < storage_info_.num_levels_ && init_count < kMaxInitCount;
|
|
3064
3137
|
++level) {
|
|
3065
3138
|
for (auto* file_meta : storage_info_.files_[level]) {
|
|
3066
|
-
if (MaybeInitializeFileMetaData(file_meta)) {
|
|
3139
|
+
if (MaybeInitializeFileMetaData(read_options, file_meta)) {
|
|
3067
3140
|
// each FileMeta will be initialized only once.
|
|
3068
3141
|
storage_info_.UpdateAccumulatedStats(file_meta);
|
|
3069
3142
|
// when option "max_open_files" is -1, all the file metadata has
|
|
@@ -3088,7 +3161,8 @@ void Version::UpdateAccumulatedStats() {
|
|
|
3088
3161
|
storage_info_.accumulated_raw_value_size_ == 0 && level >= 0; --level) {
|
|
3089
3162
|
for (int i = static_cast<int>(storage_info_.files_[level].size()) - 1;
|
|
3090
3163
|
storage_info_.accumulated_raw_value_size_ == 0 && i >= 0; --i) {
|
|
3091
|
-
if (MaybeInitializeFileMetaData(
|
|
3164
|
+
if (MaybeInitializeFileMetaData(read_options,
|
|
3165
|
+
storage_info_.files_[level][i])) {
|
|
3092
3166
|
storage_info_.UpdateAccumulatedStats(storage_info_.files_[level][i]);
|
|
3093
3167
|
}
|
|
3094
3168
|
}
|
|
@@ -3252,6 +3326,55 @@ uint32_t GetExpiredTtlFilesCount(const ImmutableOptions& ioptions,
|
|
|
3252
3326
|
}
|
|
3253
3327
|
return ttl_expired_files_count;
|
|
3254
3328
|
}
|
|
3329
|
+
|
|
3330
|
+
bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions,
|
|
3331
|
+
const MutableCFOptions& mutable_cf_options,
|
|
3332
|
+
const std::vector<FileMetaData*>& files) {
|
|
3333
|
+
const std::vector<FileTemperatureAge>& ages =
|
|
3334
|
+
mutable_cf_options.compaction_options_fifo
|
|
3335
|
+
.file_temperature_age_thresholds;
|
|
3336
|
+
if (ages.empty()) {
|
|
3337
|
+
return false;
|
|
3338
|
+
}
|
|
3339
|
+
if (files.empty()) {
|
|
3340
|
+
return false;
|
|
3341
|
+
}
|
|
3342
|
+
int64_t _current_time;
|
|
3343
|
+
auto status = ioptions.clock->GetCurrentTime(&_current_time);
|
|
3344
|
+
const uint64_t current_time = static_cast<uint64_t>(_current_time);
|
|
3345
|
+
// We use oldest_ancestor_time of a file to be the estimate age of
|
|
3346
|
+
// the file just older than it. This is the same logic used in
|
|
3347
|
+
// FIFOCompactionPicker::PickTemperatureChangeCompaction().
|
|
3348
|
+
if (status.ok() && current_time >= ages[0].age) {
|
|
3349
|
+
uint64_t create_time_threshold = current_time - ages[0].age;
|
|
3350
|
+
Temperature target_temp;
|
|
3351
|
+
assert(files.size() >= 1);
|
|
3352
|
+
for (size_t index = files.size() - 1; index >= 1; --index) {
|
|
3353
|
+
FileMetaData* cur_file = files[index];
|
|
3354
|
+
FileMetaData* prev_file = files[index - 1];
|
|
3355
|
+
if (!cur_file->being_compacted) {
|
|
3356
|
+
uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime();
|
|
3357
|
+
if (oldest_ancestor_time == kUnknownOldestAncesterTime) {
|
|
3358
|
+
return false;
|
|
3359
|
+
}
|
|
3360
|
+
if (oldest_ancestor_time > create_time_threshold) {
|
|
3361
|
+
return false;
|
|
3362
|
+
}
|
|
3363
|
+
target_temp = ages[0].temperature;
|
|
3364
|
+
for (size_t i = 1; i < ages.size(); ++i) {
|
|
3365
|
+
if (current_time >= ages[i].age &&
|
|
3366
|
+
oldest_ancestor_time <= current_time - ages[i].age) {
|
|
3367
|
+
target_temp = ages[i].temperature;
|
|
3368
|
+
}
|
|
3369
|
+
}
|
|
3370
|
+
if (cur_file->temperature != target_temp) {
|
|
3371
|
+
return true;
|
|
3372
|
+
}
|
|
3373
|
+
}
|
|
3374
|
+
}
|
|
3375
|
+
}
|
|
3376
|
+
return false;
|
|
3377
|
+
}
|
|
3255
3378
|
} // anonymous namespace
|
|
3256
3379
|
|
|
3257
3380
|
void VersionStorageInfo::ComputeCompactionScore(
|
|
@@ -3262,7 +3385,7 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3262
3385
|
// the level's target size, and 1.0 is the threshold for triggering
|
|
3263
3386
|
// compaction. Higher score means higher prioritization.
|
|
3264
3387
|
// Now we keep the compaction triggering condition, but consider more
|
|
3265
|
-
// factors for
|
|
3388
|
+
// factors for prioritization, while still keeping the 1.0 threshold.
|
|
3266
3389
|
// In order to provide flexibility for reducing score while still
|
|
3267
3390
|
// maintaining it to be over 1.0, we scale the original score by 10x
|
|
3268
3391
|
// if it is larger than 1.0.
|
|
@@ -3295,7 +3418,7 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3295
3418
|
// compaction score for the whole DB. Adding other levels as if
|
|
3296
3419
|
// they are L0 files.
|
|
3297
3420
|
for (int i = 1; i < num_levels(); i++) {
|
|
3298
|
-
//
|
|
3421
|
+
// It's possible that a subset of the files in a level may be in a
|
|
3299
3422
|
// compaction, due to delete triggered compaction or trivial move.
|
|
3300
3423
|
// In that case, the below check may not catch a level being
|
|
3301
3424
|
// compacted as it only checks the first file. The worst that can
|
|
@@ -3309,22 +3432,25 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3309
3432
|
if (compaction_style_ == kCompactionStyleFIFO) {
|
|
3310
3433
|
score = static_cast<double>(total_size) /
|
|
3311
3434
|
mutable_cf_options.compaction_options_fifo.max_table_files_size;
|
|
3312
|
-
if (
|
|
3313
|
-
mutable_cf_options.compaction_options_fifo.
|
|
3314
|
-
// Warm tier move can happen at any time. It's too expensive to
|
|
3315
|
-
// check very file's timestamp now. For now, just trigger it
|
|
3316
|
-
// slightly more frequently than FIFO compaction so that this
|
|
3317
|
-
// happens first.
|
|
3435
|
+
if (score < 1 &&
|
|
3436
|
+
mutable_cf_options.compaction_options_fifo.allow_compaction) {
|
|
3318
3437
|
score = std::max(
|
|
3319
3438
|
static_cast<double>(num_sorted_runs) /
|
|
3320
3439
|
mutable_cf_options.level0_file_num_compaction_trigger,
|
|
3321
3440
|
score);
|
|
3322
3441
|
}
|
|
3323
|
-
if (mutable_cf_options.ttl > 0) {
|
|
3324
|
-
score =
|
|
3325
|
-
static_cast<double>(GetExpiredTtlFilesCount(
|
|
3326
|
-
|
|
3327
|
-
|
|
3442
|
+
if (score < 1 && mutable_cf_options.ttl > 0) {
|
|
3443
|
+
score =
|
|
3444
|
+
std::max(static_cast<double>(GetExpiredTtlFilesCount(
|
|
3445
|
+
immutable_options, mutable_cf_options, files_[0])),
|
|
3446
|
+
score);
|
|
3447
|
+
}
|
|
3448
|
+
if (score < 1 &&
|
|
3449
|
+
ShouldChangeFileTemperature(immutable_options, mutable_cf_options,
|
|
3450
|
+
files_[0])) {
|
|
3451
|
+
// For FIFO, just need a large enough score to trigger compaction.
|
|
3452
|
+
const double kScoreForNeedCompaction = 1.1;
|
|
3453
|
+
score = kScoreForNeedCompaction;
|
|
3328
3454
|
}
|
|
3329
3455
|
} else {
|
|
3330
3456
|
score = static_cast<double>(num_sorted_runs) /
|
|
@@ -3344,7 +3470,7 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3344
3470
|
// When calculating estimated_compaction_needed_bytes, we assume
|
|
3345
3471
|
// L0 is qualified as pending compactions. We will need to make
|
|
3346
3472
|
// sure that it qualifies for compaction.
|
|
3347
|
-
// It might be
|
|
3473
|
+
// It might be guaranteed by logic below anyway, but we are
|
|
3348
3474
|
// explicit here to make sure we don't stop writes with no
|
|
3349
3475
|
// compaction scheduled.
|
|
3350
3476
|
score = std::max(score, 1.01);
|
|
@@ -3373,7 +3499,7 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3373
3499
|
}
|
|
3374
3500
|
}
|
|
3375
3501
|
}
|
|
3376
|
-
} else {
|
|
3502
|
+
} else { // level > 0
|
|
3377
3503
|
// Compute the ratio of current size to size limit.
|
|
3378
3504
|
uint64_t level_bytes_no_compacting = 0;
|
|
3379
3505
|
uint64_t level_total_bytes = 0;
|
|
@@ -3383,21 +3509,36 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3383
3509
|
level_bytes_no_compacting += f->compensated_file_size;
|
|
3384
3510
|
}
|
|
3385
3511
|
}
|
|
3386
|
-
if (!immutable_options.level_compaction_dynamic_level_bytes
|
|
3387
|
-
level_bytes_no_compacting < MaxBytesForLevel(level)) {
|
|
3512
|
+
if (!immutable_options.level_compaction_dynamic_level_bytes) {
|
|
3388
3513
|
score = static_cast<double>(level_bytes_no_compacting) /
|
|
3389
3514
|
MaxBytesForLevel(level);
|
|
3390
3515
|
} else {
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3516
|
+
if (level_bytes_no_compacting < MaxBytesForLevel(level)) {
|
|
3517
|
+
score = static_cast<double>(level_bytes_no_compacting) /
|
|
3518
|
+
MaxBytesForLevel(level);
|
|
3519
|
+
} else {
|
|
3520
|
+
// If there are a large mount of data being compacted down to the
|
|
3521
|
+
// current level soon, we would de-prioritize compaction from
|
|
3522
|
+
// a level where the incoming data would be a large ratio. We do
|
|
3523
|
+
// it by dividing level size not by target level size, but
|
|
3524
|
+
// the target size and the incoming compaction bytes.
|
|
3525
|
+
score = static_cast<double>(level_bytes_no_compacting) /
|
|
3526
|
+
(MaxBytesForLevel(level) + total_downcompact_bytes) *
|
|
3527
|
+
kScoreScale;
|
|
3528
|
+
}
|
|
3529
|
+
// Drain unnecessary levels, but with lower priority compared to
|
|
3530
|
+
// when L0 is eligible. Only non-empty levels can be unnecessary.
|
|
3531
|
+
// If there is no unnecessary levels, lowest_unnecessary_level_ = -1.
|
|
3532
|
+
if (level_bytes_no_compacting > 0 &&
|
|
3533
|
+
level <= lowest_unnecessary_level_) {
|
|
3534
|
+
score = std::max(
|
|
3535
|
+
score, kScoreScale *
|
|
3536
|
+
(1.001 + 0.001 * (lowest_unnecessary_level_ - level)));
|
|
3537
|
+
}
|
|
3399
3538
|
}
|
|
3400
|
-
if (
|
|
3539
|
+
if (level <= lowest_unnecessary_level_) {
|
|
3540
|
+
total_downcompact_bytes += level_total_bytes;
|
|
3541
|
+
} else if (level_total_bytes > MaxBytesForLevel(level)) {
|
|
3401
3542
|
total_downcompact_bytes +=
|
|
3402
3543
|
static_cast<double>(level_total_bytes - MaxBytesForLevel(level));
|
|
3403
3544
|
}
|
|
@@ -4470,6 +4611,7 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
|
|
|
4470
4611
|
}
|
|
4471
4612
|
}
|
|
4472
4613
|
} else {
|
|
4614
|
+
assert(ioptions.compaction_style == kCompactionStyleLevel);
|
|
4473
4615
|
uint64_t max_level_size = 0;
|
|
4474
4616
|
|
|
4475
4617
|
int first_non_empty_level = -1;
|
|
@@ -4494,11 +4636,13 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
|
|
|
4494
4636
|
level_max_bytes_[i] = std::numeric_limits<uint64_t>::max();
|
|
4495
4637
|
}
|
|
4496
4638
|
|
|
4639
|
+
lowest_unnecessary_level_ = -1;
|
|
4497
4640
|
if (max_level_size == 0) {
|
|
4498
4641
|
// No data for L1 and up. L0 compacts to last level directly.
|
|
4499
4642
|
// No compaction from L1+ needs to be scheduled.
|
|
4500
4643
|
base_level_ = num_levels_ - 1;
|
|
4501
4644
|
} else {
|
|
4645
|
+
assert(first_non_empty_level >= 1);
|
|
4502
4646
|
uint64_t base_bytes_max = options.max_bytes_for_level_base;
|
|
4503
4647
|
uint64_t base_bytes_min = static_cast<uint64_t>(
|
|
4504
4648
|
base_bytes_max / options.max_bytes_for_level_multiplier);
|
|
@@ -4509,20 +4653,41 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
|
|
|
4509
4653
|
// Round up after dividing
|
|
4510
4654
|
cur_level_size = static_cast<uint64_t>(
|
|
4511
4655
|
cur_level_size / options.max_bytes_for_level_multiplier);
|
|
4656
|
+
if (lowest_unnecessary_level_ == -1 &&
|
|
4657
|
+
cur_level_size <= base_bytes_min &&
|
|
4658
|
+
(ioptions.preclude_last_level_data_seconds == 0 ||
|
|
4659
|
+
i < num_levels_ - 2)) {
|
|
4660
|
+
// When per_key_placement is enabled, the penultimate level is
|
|
4661
|
+
// necessary.
|
|
4662
|
+
lowest_unnecessary_level_ = i;
|
|
4663
|
+
}
|
|
4512
4664
|
}
|
|
4513
4665
|
|
|
4514
4666
|
// Calculate base level and its size.
|
|
4515
4667
|
uint64_t base_level_size;
|
|
4516
4668
|
if (cur_level_size <= base_bytes_min) {
|
|
4669
|
+
// If per_key_placement is not enabled,
|
|
4670
|
+
// either there is only one non-empty level after level 0,
|
|
4671
|
+
// which can less than base_bytes_min AND necessary,
|
|
4672
|
+
// or there is some unnecessary level.
|
|
4673
|
+
assert(first_non_empty_level == num_levels_ - 1 ||
|
|
4674
|
+
ioptions.preclude_last_level_data_seconds > 0 ||
|
|
4675
|
+
lowest_unnecessary_level_ != -1);
|
|
4517
4676
|
// Case 1. If we make target size of last level to be max_level_size,
|
|
4518
4677
|
// target size of the first non-empty level would be smaller than
|
|
4519
4678
|
// base_bytes_min. We set it be base_bytes_min.
|
|
4520
4679
|
base_level_size = base_bytes_min + 1U;
|
|
4521
4680
|
base_level_ = first_non_empty_level;
|
|
4522
|
-
|
|
4523
|
-
|
|
4524
|
-
|
|
4681
|
+
if (base_level_ < num_levels_ - 1) {
|
|
4682
|
+
ROCKS_LOG_INFO(
|
|
4683
|
+
ioptions.logger,
|
|
4684
|
+
"More existing levels in DB than needed: all non-zero "
|
|
4685
|
+
"levels <= level %d are unnecessary. "
|
|
4686
|
+
"max_bytes_for_level_multiplier may not be guaranteed.",
|
|
4687
|
+
lowest_unnecessary_level_);
|
|
4688
|
+
}
|
|
4525
4689
|
} else {
|
|
4690
|
+
assert(lowest_unnecessary_level_ == -1);
|
|
4526
4691
|
// Find base level (where L0 data is compacted to).
|
|
4527
4692
|
base_level_ = first_non_empty_level;
|
|
4528
4693
|
while (base_level_ > 1 && cur_level_size > base_bytes_max) {
|
|
@@ -4931,7 +5096,8 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
|
|
|
4931
5096
|
Status VersionSet::ProcessManifestWrites(
|
|
4932
5097
|
std::deque<ManifestWriter>& writers, InstrumentedMutex* mu,
|
|
4933
5098
|
FSDirectory* dir_contains_current_file, bool new_descriptor_log,
|
|
4934
|
-
const ColumnFamilyOptions* new_cf_options
|
|
5099
|
+
const ColumnFamilyOptions* new_cf_options,
|
|
5100
|
+
const ReadOptions& read_options) {
|
|
4935
5101
|
mu->AssertHeld();
|
|
4936
5102
|
assert(!writers.empty());
|
|
4937
5103
|
ManifestWriter& first_writer = writers.front();
|
|
@@ -5162,7 +5328,8 @@ Status VersionSet::ProcessManifestWrites(
|
|
|
5162
5328
|
true /* prefetch_index_and_filter_in_cache */,
|
|
5163
5329
|
false /* is_initial_load */,
|
|
5164
5330
|
mutable_cf_options_ptrs[i]->prefix_extractor,
|
|
5165
|
-
MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i])
|
|
5331
|
+
MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i]), read_options,
|
|
5332
|
+
mutable_cf_options_ptrs[i]->block_protection_bytes_per_key);
|
|
5166
5333
|
if (!s.ok()) {
|
|
5167
5334
|
if (db_options_->paranoid_checks) {
|
|
5168
5335
|
break;
|
|
@@ -5207,7 +5374,8 @@ Status VersionSet::ProcessManifestWrites(
|
|
|
5207
5374
|
constexpr bool update_stats = true;
|
|
5208
5375
|
|
|
5209
5376
|
for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
|
|
5210
|
-
versions[i]->PrepareAppend(*mutable_cf_options_ptrs[i],
|
|
5377
|
+
versions[i]->PrepareAppend(*mutable_cf_options_ptrs[i], read_options,
|
|
5378
|
+
update_stats);
|
|
5211
5379
|
}
|
|
5212
5380
|
}
|
|
5213
5381
|
|
|
@@ -5319,7 +5487,8 @@ Status VersionSet::ProcessManifestWrites(
|
|
|
5319
5487
|
assert(batch_edits.size() == 1);
|
|
5320
5488
|
assert(new_cf_options != nullptr);
|
|
5321
5489
|
assert(max_last_sequence == descriptor_last_sequence_);
|
|
5322
|
-
CreateColumnFamily(*new_cf_options,
|
|
5490
|
+
CreateColumnFamily(*new_cf_options, read_options,
|
|
5491
|
+
first_writer.edit_list.front());
|
|
5323
5492
|
} else if (first_writer.edit_list.front()->is_column_family_drop_) {
|
|
5324
5493
|
assert(batch_edits.size() == 1);
|
|
5325
5494
|
assert(max_last_sequence == descriptor_last_sequence_);
|
|
@@ -5488,6 +5657,7 @@ void VersionSet::WakeUpWaitingManifestWriters() {
|
|
|
5488
5657
|
Status VersionSet::LogAndApply(
|
|
5489
5658
|
const autovector<ColumnFamilyData*>& column_family_datas,
|
|
5490
5659
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
|
5660
|
+
const ReadOptions& read_options,
|
|
5491
5661
|
const autovector<autovector<VersionEdit*>>& edit_lists,
|
|
5492
5662
|
InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
|
|
5493
5663
|
bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options,
|
|
@@ -5565,7 +5735,8 @@ Status VersionSet::LogAndApply(
|
|
|
5565
5735
|
return Status::ColumnFamilyDropped();
|
|
5566
5736
|
}
|
|
5567
5737
|
return ProcessManifestWrites(writers, mu, dir_contains_current_file,
|
|
5568
|
-
new_descriptor_log, new_cf_options
|
|
5738
|
+
new_descriptor_log, new_cf_options,
|
|
5739
|
+
read_options);
|
|
5569
5740
|
}
|
|
5570
5741
|
|
|
5571
5742
|
void VersionSet::LogAndApplyCFHelper(VersionEdit* edit,
|
|
@@ -5649,6 +5820,7 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
|
|
|
5649
5820
|
Status VersionSet::Recover(
|
|
5650
5821
|
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
|
|
5651
5822
|
std::string* db_id, bool no_error_if_files_missing) {
|
|
5823
|
+
const ReadOptions read_options(Env::IOActivity::kDBOpen);
|
|
5652
5824
|
// Read "CURRENT" file, which contains a pointer to the current manifest
|
|
5653
5825
|
// file
|
|
5654
5826
|
std::string manifest_path;
|
|
@@ -5685,7 +5857,7 @@ Status VersionSet::Recover(
|
|
|
5685
5857
|
VersionEditHandler handler(
|
|
5686
5858
|
read_only, column_families, const_cast<VersionSet*>(this),
|
|
5687
5859
|
/*track_missing_files=*/false, no_error_if_files_missing, io_tracer_,
|
|
5688
|
-
EpochNumberRequirement::kMightMissing);
|
|
5860
|
+
read_options, EpochNumberRequirement::kMightMissing);
|
|
5689
5861
|
handler.Iterate(reader, &log_read_status);
|
|
5690
5862
|
s = handler.status();
|
|
5691
5863
|
if (s.ok()) {
|
|
@@ -5833,6 +6005,7 @@ Status VersionSet::TryRecoverFromOneManifest(
|
|
|
5833
6005
|
const std::string& manifest_path,
|
|
5834
6006
|
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
|
|
5835
6007
|
std::string* db_id, bool* has_missing_table_file) {
|
|
6008
|
+
const ReadOptions read_options(Env::IOActivity::kDBOpen);
|
|
5836
6009
|
ROCKS_LOG_INFO(db_options_->info_log, "Trying to recover from manifest: %s\n",
|
|
5837
6010
|
manifest_path.c_str());
|
|
5838
6011
|
std::unique_ptr<SequentialFileReader> manifest_file_reader;
|
|
@@ -5857,7 +6030,7 @@ Status VersionSet::TryRecoverFromOneManifest(
|
|
|
5857
6030
|
/*checksum=*/true, /*log_num=*/0);
|
|
5858
6031
|
VersionEditHandlerPointInTime handler_pit(
|
|
5859
6032
|
read_only, column_families, const_cast<VersionSet*>(this), io_tracer_,
|
|
5860
|
-
EpochNumberRequirement::kMightMissing);
|
|
6033
|
+
read_options, EpochNumberRequirement::kMightMissing);
|
|
5861
6034
|
|
|
5862
6035
|
handler_pit.Iterate(reader, &s);
|
|
5863
6036
|
|
|
@@ -5900,6 +6073,8 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
|
|
|
5900
6073
|
Status VersionSet::ListColumnFamiliesFromManifest(
|
|
5901
6074
|
const std::string& manifest_path, FileSystem* fs,
|
|
5902
6075
|
std::vector<std::string>* column_families) {
|
|
6076
|
+
// TODO: plumb Env::IOActivity
|
|
6077
|
+
const ReadOptions read_options;
|
|
5903
6078
|
std::unique_ptr<SequentialFileReader> file_reader;
|
|
5904
6079
|
Status s;
|
|
5905
6080
|
{
|
|
@@ -5919,7 +6094,7 @@ Status VersionSet::ListColumnFamiliesFromManifest(
|
|
|
5919
6094
|
log::Reader reader(nullptr, std::move(file_reader), &reporter,
|
|
5920
6095
|
true /* checksum */, 0 /* log_number */);
|
|
5921
6096
|
|
|
5922
|
-
ListColumnFamiliesHandler handler;
|
|
6097
|
+
ListColumnFamiliesHandler handler(read_options);
|
|
5923
6098
|
handler.Iterate(reader, &s);
|
|
5924
6099
|
|
|
5925
6100
|
assert(column_families);
|
|
@@ -5942,6 +6117,9 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
|
|
|
5942
6117
|
"Number of levels needs to be bigger than 1");
|
|
5943
6118
|
}
|
|
5944
6119
|
|
|
6120
|
+
// TODO: plumb Env::IOActivity
|
|
6121
|
+
const ReadOptions read_options;
|
|
6122
|
+
|
|
5945
6123
|
ImmutableDBOptions db_options(*options);
|
|
5946
6124
|
ColumnFamilyOptions cf_options(*options);
|
|
5947
6125
|
std::shared_ptr<Cache> tc(NewLRUCache(options->max_open_files - 10,
|
|
@@ -6029,8 +6207,8 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
|
|
|
6029
6207
|
InstrumentedMutex dummy_mutex;
|
|
6030
6208
|
InstrumentedMutexLock l(&dummy_mutex);
|
|
6031
6209
|
return versions.LogAndApply(versions.GetColumnFamilySet()->GetDefault(),
|
|
6032
|
-
mutable_cf_options,
|
|
6033
|
-
true);
|
|
6210
|
+
mutable_cf_options, read_options, &ve,
|
|
6211
|
+
&dummy_mutex, nullptr, true);
|
|
6034
6212
|
}
|
|
6035
6213
|
|
|
6036
6214
|
// Get the checksum information including the checksum and checksum function
|
|
@@ -6103,6 +6281,9 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
|
|
|
6103
6281
|
Status VersionSet::DumpManifest(Options& options, std::string& dscname,
|
|
6104
6282
|
bool verbose, bool hex, bool json) {
|
|
6105
6283
|
assert(options.env);
|
|
6284
|
+
// TODO: plumb Env::IOActivity
|
|
6285
|
+
const ReadOptions read_options;
|
|
6286
|
+
|
|
6106
6287
|
std::vector<std::string> column_families;
|
|
6107
6288
|
Status s = ListColumnFamiliesFromManifest(
|
|
6108
6289
|
dscname, options.env->GetFileSystem().get(), &column_families);
|
|
@@ -6129,7 +6310,8 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
|
|
|
6129
6310
|
cf_descs.emplace_back(cf, options);
|
|
6130
6311
|
}
|
|
6131
6312
|
|
|
6132
|
-
DumpManifestHandler handler(cf_descs, this, io_tracer_,
|
|
6313
|
+
DumpManifestHandler handler(cf_descs, this, io_tracer_, read_options, verbose,
|
|
6314
|
+
hex, json);
|
|
6133
6315
|
{
|
|
6134
6316
|
VersionSet::LogReporter reporter;
|
|
6135
6317
|
reporter.status = &s;
|
|
@@ -6267,7 +6449,7 @@ Status VersionSet::WriteCurrentStateToManifest(
|
|
|
6267
6449
|
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
6268
6450
|
f->file_creation_time, f->epoch_number, f->file_checksum,
|
|
6269
6451
|
f->file_checksum_func_name, f->unique_id,
|
|
6270
|
-
f->compensated_range_deletion_size);
|
|
6452
|
+
f->compensated_range_deletion_size, f->tail_size);
|
|
6271
6453
|
}
|
|
6272
6454
|
}
|
|
6273
6455
|
|
|
@@ -6332,6 +6514,7 @@ Status VersionSet::WriteCurrentStateToManifest(
|
|
|
6332
6514
|
// we avoid doing binary search for the keys b and c twice and instead somehow
|
|
6333
6515
|
// maintain state of where they first appear in the files.
|
|
6334
6516
|
uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
|
|
6517
|
+
const ReadOptions& read_options,
|
|
6335
6518
|
Version* v, const Slice& start,
|
|
6336
6519
|
const Slice& end, int start_level,
|
|
6337
6520
|
int end_level, TableReaderCaller caller) {
|
|
@@ -6411,8 +6594,8 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
|
|
|
6411
6594
|
for (int i = idx_start + 1; i < idx_end; ++i) {
|
|
6412
6595
|
uint64_t file_size = files_brief.files[i].fd.GetFileSize();
|
|
6413
6596
|
// The entire file falls into the range, so we can just take its size.
|
|
6414
|
-
assert(file_size ==
|
|
6415
|
-
|
|
6597
|
+
assert(file_size == ApproximateSize(read_options, v, files_brief.files[i],
|
|
6598
|
+
start, end, caller));
|
|
6416
6599
|
total_full_size += file_size;
|
|
6417
6600
|
}
|
|
6418
6601
|
|
|
@@ -6447,21 +6630,24 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
|
|
|
6447
6630
|
// Estimate for all the first files (might also be last files), at each
|
|
6448
6631
|
// level
|
|
6449
6632
|
for (const auto file_ptr : first_files) {
|
|
6450
|
-
total_full_size +=
|
|
6633
|
+
total_full_size +=
|
|
6634
|
+
ApproximateSize(read_options, v, *file_ptr, start, end, caller);
|
|
6451
6635
|
}
|
|
6452
6636
|
|
|
6453
6637
|
// Estimate for all the last files, at each level
|
|
6454
6638
|
for (const auto file_ptr : last_files) {
|
|
6455
6639
|
// We could use ApproximateSize here, but calling ApproximateOffsetOf
|
|
6456
6640
|
// directly is just more efficient.
|
|
6457
|
-
total_full_size +=
|
|
6641
|
+
total_full_size +=
|
|
6642
|
+
ApproximateOffsetOf(read_options, v, *file_ptr, end, caller);
|
|
6458
6643
|
}
|
|
6459
6644
|
}
|
|
6460
6645
|
|
|
6461
6646
|
return total_full_size;
|
|
6462
6647
|
}
|
|
6463
6648
|
|
|
6464
|
-
uint64_t VersionSet::ApproximateOffsetOf(
|
|
6649
|
+
uint64_t VersionSet::ApproximateOffsetOf(const ReadOptions& read_options,
|
|
6650
|
+
Version* v, const FdWithKeyRange& f,
|
|
6465
6651
|
const Slice& key,
|
|
6466
6652
|
TableReaderCaller caller) {
|
|
6467
6653
|
// pre-condition
|
|
@@ -6479,16 +6665,18 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const FdWithKeyRange& f,
|
|
|
6479
6665
|
// "key" falls in the range for this table. Add the
|
|
6480
6666
|
// approximate offset of "key" within the table.
|
|
6481
6667
|
TableCache* table_cache = v->cfd_->table_cache();
|
|
6668
|
+
const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
|
|
6482
6669
|
if (table_cache != nullptr) {
|
|
6483
6670
|
result = table_cache->ApproximateOffsetOf(
|
|
6484
|
-
key, *f.file_metadata, caller, icmp,
|
|
6485
|
-
|
|
6671
|
+
read_options, key, *f.file_metadata, caller, icmp,
|
|
6672
|
+
cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
|
|
6486
6673
|
}
|
|
6487
6674
|
}
|
|
6488
6675
|
return result;
|
|
6489
6676
|
}
|
|
6490
6677
|
|
|
6491
|
-
uint64_t VersionSet::ApproximateSize(
|
|
6678
|
+
uint64_t VersionSet::ApproximateSize(const ReadOptions& read_options,
|
|
6679
|
+
Version* v, const FdWithKeyRange& f,
|
|
6492
6680
|
const Slice& start, const Slice& end,
|
|
6493
6681
|
TableReaderCaller caller) {
|
|
6494
6682
|
// pre-condition
|
|
@@ -6504,13 +6692,14 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
|
|
|
6504
6692
|
|
|
6505
6693
|
if (icmp.Compare(f.smallest_key, start) >= 0) {
|
|
6506
6694
|
// Start of the range is before the file start - approximate by end offset
|
|
6507
|
-
return ApproximateOffsetOf(v, f, end, caller);
|
|
6695
|
+
return ApproximateOffsetOf(read_options, v, f, end, caller);
|
|
6508
6696
|
}
|
|
6509
6697
|
|
|
6510
6698
|
if (icmp.Compare(f.largest_key, end) < 0) {
|
|
6511
6699
|
// End of the range is after the file end - approximate by subtracting
|
|
6512
6700
|
// start offset from the file size
|
|
6513
|
-
uint64_t start_offset =
|
|
6701
|
+
uint64_t start_offset =
|
|
6702
|
+
ApproximateOffsetOf(read_options, v, f, start, caller);
|
|
6514
6703
|
assert(f.fd.GetFileSize() >= start_offset);
|
|
6515
6704
|
return f.fd.GetFileSize() - start_offset;
|
|
6516
6705
|
}
|
|
@@ -6520,9 +6709,10 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
|
|
|
6520
6709
|
if (table_cache == nullptr) {
|
|
6521
6710
|
return 0;
|
|
6522
6711
|
}
|
|
6712
|
+
const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
|
|
6523
6713
|
return table_cache->ApproximateSize(
|
|
6524
|
-
start, end, *f.file_metadata, caller, icmp,
|
|
6525
|
-
|
|
6714
|
+
read_options, start, end, *f.file_metadata, caller, icmp,
|
|
6715
|
+
cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
|
|
6526
6716
|
}
|
|
6527
6717
|
|
|
6528
6718
|
void VersionSet::RemoveLiveFiles(
|
|
@@ -6681,6 +6871,7 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|
|
6681
6871
|
/*smallest_compaction_key=*/nullptr,
|
|
6682
6872
|
/*largest_compaction_key=*/nullptr,
|
|
6683
6873
|
/*allow_unprepared_value=*/false,
|
|
6874
|
+
c->mutable_cf_options()->block_protection_bytes_per_key,
|
|
6684
6875
|
/*range_del_iter=*/&range_tombstone_iter);
|
|
6685
6876
|
range_tombstones.emplace_back(range_tombstone_iter, nullptr);
|
|
6686
6877
|
}
|
|
@@ -6694,8 +6885,9 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|
|
6694
6885
|
/*should_sample=*/false,
|
|
6695
6886
|
/*no per level latency histogram=*/nullptr,
|
|
6696
6887
|
TableReaderCaller::kCompaction, /*skip_filters=*/false,
|
|
6697
|
-
/*level=*/static_cast<int>(c->level(which)),
|
|
6698
|
-
c->
|
|
6888
|
+
/*level=*/static_cast<int>(c->level(which)),
|
|
6889
|
+
c->mutable_cf_options()->block_protection_bytes_per_key,
|
|
6890
|
+
range_del_agg, c->boundaries(which), false, &tombstone_iter_ptr);
|
|
6699
6891
|
range_tombstones.emplace_back(nullptr, tombstone_iter_ptr);
|
|
6700
6892
|
}
|
|
6701
6893
|
}
|
|
@@ -6812,7 +7004,8 @@ void VersionSet::GetObsoleteFiles(std::vector<ObsoleteFileInfo>* files,
|
|
|
6812
7004
|
}
|
|
6813
7005
|
|
|
6814
7006
|
ColumnFamilyData* VersionSet::CreateColumnFamily(
|
|
6815
|
-
const ColumnFamilyOptions& cf_options, const
|
|
7007
|
+
const ColumnFamilyOptions& cf_options, const ReadOptions& read_options,
|
|
7008
|
+
const VersionEdit* edit) {
|
|
6816
7009
|
assert(edit->is_column_family_add_);
|
|
6817
7010
|
|
|
6818
7011
|
MutableCFOptions dummy_cf_options;
|
|
@@ -6831,7 +7024,8 @@ ColumnFamilyData* VersionSet::CreateColumnFamily(
|
|
|
6831
7024
|
|
|
6832
7025
|
constexpr bool update_stats = false;
|
|
6833
7026
|
|
|
6834
|
-
v->PrepareAppend(*new_cfd->GetLatestMutableCFOptions(),
|
|
7027
|
+
v->PrepareAppend(*new_cfd->GetLatestMutableCFOptions(), read_options,
|
|
7028
|
+
update_stats);
|
|
6835
7029
|
|
|
6836
7030
|
AppendVersion(new_cfd, v);
|
|
6837
7031
|
// GetLatestMutableCFOptions() is safe here without mutex since the
|
|
@@ -6896,7 +7090,8 @@ uint64_t VersionSet::GetTotalBlobFileSize(Version* dummy_versions) {
|
|
|
6896
7090
|
return all_versions_blob_file_size;
|
|
6897
7091
|
}
|
|
6898
7092
|
|
|
6899
|
-
Status VersionSet::VerifyFileMetadata(
|
|
7093
|
+
Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
|
|
7094
|
+
ColumnFamilyData* cfd,
|
|
6900
7095
|
const std::string& fpath, int level,
|
|
6901
7096
|
const FileMetaData& meta) {
|
|
6902
7097
|
uint64_t fsize = 0;
|
|
@@ -6929,7 +7124,8 @@ Status VersionSet::VerifyFileMetadata(ColumnFamilyData* cfd,
|
|
|
6929
7124
|
TableCache::TypedHandle* handle = nullptr;
|
|
6930
7125
|
FileMetaData meta_copy = meta;
|
|
6931
7126
|
status = table_cache->FindTable(
|
|
6932
|
-
|
|
7127
|
+
read_options, file_opts, *icmp, meta_copy, &handle,
|
|
7128
|
+
cf_opts->block_protection_bytes_per_key, pe,
|
|
6933
7129
|
/*no_io=*/false, /*record_read_stats=*/true,
|
|
6934
7130
|
internal_stats->GetFileReadHist(level), false, level,
|
|
6935
7131
|
/*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
|
|
@@ -6973,9 +7169,9 @@ Status ReactiveVersionSet::Recover(
|
|
|
6973
7169
|
log::Reader* reader = manifest_reader->get();
|
|
6974
7170
|
assert(reader);
|
|
6975
7171
|
|
|
6976
|
-
manifest_tailer_.reset(
|
|
6977
|
-
|
|
6978
|
-
|
|
7172
|
+
manifest_tailer_.reset(new ManifestTailer(
|
|
7173
|
+
column_families, const_cast<ReactiveVersionSet*>(this), io_tracer_,
|
|
7174
|
+
read_options_, EpochNumberRequirement::kMightMissing));
|
|
6979
7175
|
|
|
6980
7176
|
manifest_tailer_->Iterate(*reader, manifest_reader_status->get());
|
|
6981
7177
|
|