@nxtedition/rocksdb 15.4.0 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -19
- package/cache.js +1 -1
- package/chained-batch.js +12 -3
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/index.js +11 -2
- package/iterator.js +15 -7
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -170,7 +170,7 @@ class FilePicker {
|
|
|
170
170
|
if (!search_ended_) {
|
|
171
171
|
// Prefetch Level 0 table data to avoid cache miss if possible.
|
|
172
172
|
for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
|
|
173
|
-
auto* r = (*level_files_brief_)[0].files[i].fd.
|
|
173
|
+
auto* r = (*level_files_brief_)[0].files[i].fd.pinned_reader.Get();
|
|
174
174
|
if (r) {
|
|
175
175
|
r->Prepare(ikey);
|
|
176
176
|
}
|
|
@@ -395,7 +395,7 @@ class FilePickerMultiGet {
|
|
|
395
395
|
// prefetching. This may not be necessary anymore once we implement
|
|
396
396
|
// batching in those table readers
|
|
397
397
|
for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
|
|
398
|
-
auto* r = (*level_files_brief_)[0].files[i].fd.
|
|
398
|
+
auto* r = (*level_files_brief_)[0].files[i].fd.pinned_reader.Get();
|
|
399
399
|
if (r) {
|
|
400
400
|
for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
|
|
401
401
|
r->Prepare(iter->ikey);
|
|
@@ -974,9 +974,8 @@ class LevelIterator final : public InternalIterator {
|
|
|
974
974
|
TableCache* table_cache, const ReadOptions& read_options,
|
|
975
975
|
const FileOptions& file_options, const InternalKeyComparator& icomparator,
|
|
976
976
|
const LevelFilesBrief* flevel, const MutableCFOptions& mutable_cf_options,
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
RangeDelAggregator* range_del_agg,
|
|
977
|
+
HistogramImpl* file_read_hist, TableReaderCaller caller,
|
|
978
|
+
bool skip_filters, int level, RangeDelAggregator* range_del_agg,
|
|
980
979
|
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
|
981
980
|
nullptr,
|
|
982
981
|
bool allow_unprepared_value = false,
|
|
@@ -1002,7 +1001,6 @@ class LevelIterator final : public InternalIterator {
|
|
|
1002
1001
|
? read_options.snapshot->GetSequenceNumber()
|
|
1003
1002
|
: kMaxSequenceNumber),
|
|
1004
1003
|
level_(level),
|
|
1005
|
-
should_sample_(should_sample),
|
|
1006
1004
|
skip_filters_(skip_filters),
|
|
1007
1005
|
allow_unprepared_value_(allow_unprepared_value),
|
|
1008
1006
|
is_next_read_sequential_(false),
|
|
@@ -1267,6 +1265,26 @@ class LevelIterator final : public InternalIterator {
|
|
|
1267
1265
|
*read_options_.iterate_upper_bound, /*b_has_ts=*/false) >= 0;
|
|
1268
1266
|
}
|
|
1269
1267
|
|
|
1268
|
+
template <bool IsSeek>
|
|
1269
|
+
void SampleRead() {
|
|
1270
|
+
bool sampled =
|
|
1271
|
+
IsSeek ? should_sample_file_read() : should_sample_file_read_next();
|
|
1272
|
+
if (!sampled) {
|
|
1273
|
+
return;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
if (file_index_ >= flevel_->num_files || !file_iter_.Valid()) {
|
|
1277
|
+
return;
|
|
1278
|
+
}
|
|
1279
|
+
const FileMetaData* meta = flevel_->files[file_index_].file_metadata;
|
|
1280
|
+
sample_file_read_inc(meta);
|
|
1281
|
+
ValueType type = ExtractValueType(file_iter_.key());
|
|
1282
|
+
if (type == kTypeDeletion || type == kTypeSingleDeletion ||
|
|
1283
|
+
type == kTypeDeletionWithTimestamp || type == kTypeMerge) {
|
|
1284
|
+
sample_collapsible_entry_file_read_inc(meta);
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1270
1288
|
void ClearRangeTombstoneIter() {
|
|
1271
1289
|
if (range_tombstone_iter_) {
|
|
1272
1290
|
range_tombstone_iter_->reset();
|
|
@@ -1279,9 +1297,6 @@ class LevelIterator final : public InternalIterator {
|
|
|
1279
1297
|
InternalIterator* NewFileIterator() {
|
|
1280
1298
|
assert(file_index_ < flevel_->num_files);
|
|
1281
1299
|
auto file_meta = flevel_->files[file_index_];
|
|
1282
|
-
if (should_sample_) {
|
|
1283
|
-
sample_file_read_inc(file_meta.file_metadata);
|
|
1284
|
-
}
|
|
1285
1300
|
|
|
1286
1301
|
const InternalKey* smallest_compaction_key = nullptr;
|
|
1287
1302
|
const InternalKey* largest_compaction_key = nullptr;
|
|
@@ -1298,7 +1313,8 @@ class LevelIterator final : public InternalIterator {
|
|
|
1298
1313
|
/*arena=*/nullptr, skip_filters_, level_,
|
|
1299
1314
|
/*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
|
|
1300
1315
|
largest_compaction_key, allow_unprepared_value_, &read_seq_,
|
|
1301
|
-
range_tombstone_iter_
|
|
1316
|
+
range_tombstone_iter_,
|
|
1317
|
+
/*maybe_pin_table_handle=*/true);
|
|
1302
1318
|
}
|
|
1303
1319
|
|
|
1304
1320
|
// Check if current file being fully within iterate_lower_bound.
|
|
@@ -1361,7 +1377,6 @@ class LevelIterator final : public InternalIterator {
|
|
|
1361
1377
|
SequenceNumber read_seq_;
|
|
1362
1378
|
|
|
1363
1379
|
int level_;
|
|
1364
|
-
bool should_sample_;
|
|
1365
1380
|
bool skip_filters_;
|
|
1366
1381
|
bool allow_unprepared_value_;
|
|
1367
1382
|
bool may_be_out_of_lower_bound_ = true;
|
|
@@ -1498,6 +1513,7 @@ void LevelIterator::Seek(const Slice& target) {
|
|
|
1498
1513
|
}
|
|
1499
1514
|
SkipEmptyFileForward();
|
|
1500
1515
|
CheckMayBeOutOfLowerBound();
|
|
1516
|
+
SampleRead<true>();
|
|
1501
1517
|
}
|
|
1502
1518
|
|
|
1503
1519
|
void LevelIterator::SeekForPrev(const Slice& target) {
|
|
@@ -1533,6 +1549,7 @@ void LevelIterator::SeekForPrev(const Slice& target) {
|
|
|
1533
1549
|
SkipEmptyFileBackward();
|
|
1534
1550
|
}
|
|
1535
1551
|
CheckMayBeOutOfLowerBound();
|
|
1552
|
+
SampleRead<true>();
|
|
1536
1553
|
}
|
|
1537
1554
|
|
|
1538
1555
|
void LevelIterator::SeekToFirst() {
|
|
@@ -1549,6 +1566,7 @@ void LevelIterator::SeekToFirst() {
|
|
|
1549
1566
|
}
|
|
1550
1567
|
SkipEmptyFileForward();
|
|
1551
1568
|
CheckMayBeOutOfLowerBound();
|
|
1569
|
+
SampleRead<true>();
|
|
1552
1570
|
}
|
|
1553
1571
|
|
|
1554
1572
|
void LevelIterator::SeekToLast() {
|
|
@@ -1563,6 +1581,7 @@ void LevelIterator::SeekToLast() {
|
|
|
1563
1581
|
}
|
|
1564
1582
|
SkipEmptyFileBackward();
|
|
1565
1583
|
CheckMayBeOutOfLowerBound();
|
|
1584
|
+
SampleRead<true>();
|
|
1566
1585
|
}
|
|
1567
1586
|
|
|
1568
1587
|
void LevelIterator::Next() {
|
|
@@ -1577,6 +1596,7 @@ void LevelIterator::Next() {
|
|
|
1577
1596
|
}
|
|
1578
1597
|
}
|
|
1579
1598
|
SkipEmptyFileForward();
|
|
1599
|
+
SampleRead<false>();
|
|
1580
1600
|
}
|
|
1581
1601
|
|
|
1582
1602
|
bool LevelIterator::NextAndGetResult(IterateResult* result) {
|
|
@@ -1610,6 +1630,7 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) {
|
|
|
1610
1630
|
}
|
|
1611
1631
|
}
|
|
1612
1632
|
}
|
|
1633
|
+
SampleRead<false>();
|
|
1613
1634
|
return is_valid;
|
|
1614
1635
|
}
|
|
1615
1636
|
|
|
@@ -1624,6 +1645,7 @@ void LevelIterator::Prev() {
|
|
|
1624
1645
|
}
|
|
1625
1646
|
}
|
|
1626
1647
|
SkipEmptyFileBackward();
|
|
1648
|
+
SampleRead<false>();
|
|
1627
1649
|
}
|
|
1628
1650
|
|
|
1629
1651
|
bool LevelIterator::SkipEmptyFileForward() {
|
|
@@ -1820,8 +1842,10 @@ Status Version::GetTableProperties(const ReadOptions& read_options,
|
|
|
1820
1842
|
file_name = TableFileName(ioptions.cf_paths, file_meta->fd.GetNumber(),
|
|
1821
1843
|
file_meta->fd.GetPathId());
|
|
1822
1844
|
}
|
|
1823
|
-
|
|
1824
|
-
|
|
1845
|
+
FileOptions fopts = file_options_;
|
|
1846
|
+
fopts.file_checksum = file_meta->file_checksum;
|
|
1847
|
+
fopts.file_checksum_func_name = file_meta->file_checksum_func_name;
|
|
1848
|
+
s = ioptions.fs->NewRandomAccessFile(file_name, fopts, &file, nullptr);
|
|
1825
1849
|
if (!s.ok()) {
|
|
1826
1850
|
return s;
|
|
1827
1851
|
}
|
|
@@ -2219,7 +2243,7 @@ void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
|
|
|
2219
2243
|
uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
|
|
2220
2244
|
for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
|
|
2221
2245
|
for (FileMetaData* meta : storage_info_.LevelFiles(level)) {
|
|
2222
|
-
assert(meta->fd.
|
|
2246
|
+
assert(meta->fd.pinned_reader.Get() != nullptr);
|
|
2223
2247
|
uint64_t file_creation_time = meta->TryGetFileCreationTime();
|
|
2224
2248
|
if (file_creation_time == kUnknownFileCreationTime) {
|
|
2225
2249
|
*creation_time = 0;
|
|
@@ -2242,8 +2266,7 @@ InternalIterator* Version::TEST_GetLevelIterator(
|
|
|
2242
2266
|
auto level_iter = new (mem) LevelIterator(
|
|
2243
2267
|
cfd_->table_cache(), read_options, file_options_,
|
|
2244
2268
|
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
|
|
2245
|
-
mutable_cf_options_,
|
|
2246
|
-
cfd_->internal_stats()->GetFileReadHist(level),
|
|
2269
|
+
mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
|
|
2247
2270
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
2248
2271
|
nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
|
|
2249
2272
|
allow_unprepared_value, &tombstone_iter_ptr, db_statistics_, clock_);
|
|
@@ -2339,8 +2362,6 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
2339
2362
|
return;
|
|
2340
2363
|
}
|
|
2341
2364
|
|
|
2342
|
-
bool should_sample = should_sample_file_read();
|
|
2343
|
-
|
|
2344
2365
|
auto* arena = merge_iter_builder->GetArena();
|
|
2345
2366
|
if (level == 0) {
|
|
2346
2367
|
// Merge all level zero files together since they may overlap
|
|
@@ -2355,7 +2376,8 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
2355
2376
|
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
|
2356
2377
|
/*smallest_compaction_key=*/nullptr,
|
|
2357
2378
|
/*largest_compaction_key=*/nullptr, allow_unprepared_value,
|
|
2358
|
-
/*range_del_read_seqno=*/nullptr, &tombstone_iter
|
|
2379
|
+
/*range_del_read_seqno=*/nullptr, &tombstone_iter,
|
|
2380
|
+
/*maybe_pin_table_handle=*/true);
|
|
2359
2381
|
if (read_options.ignore_range_deletions) {
|
|
2360
2382
|
merge_iter_builder->AddIterator(table_iter);
|
|
2361
2383
|
} else {
|
|
@@ -2363,11 +2385,10 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
2363
2385
|
table_iter, std::move(tombstone_iter));
|
|
2364
2386
|
}
|
|
2365
2387
|
}
|
|
2366
|
-
if (
|
|
2388
|
+
if (should_sample_file_read()) {
|
|
2367
2389
|
// Count ones for every L0 files. This is done per iterator creation
|
|
2368
|
-
// rather than Seek(), while files in other levels are
|
|
2369
|
-
//
|
|
2370
|
-
// discrepancy here.
|
|
2390
|
+
// rather than Seek(), while files in other levels are sampled on
|
|
2391
|
+
// seek/next/prev.
|
|
2371
2392
|
for (FileMetaData* meta : storage_info_.LevelFiles(0)) {
|
|
2372
2393
|
sample_file_read_inc(meta);
|
|
2373
2394
|
}
|
|
@@ -2381,8 +2402,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
2381
2402
|
auto level_iter = new (mem) LevelIterator(
|
|
2382
2403
|
cfd_->table_cache(), read_options, soptions,
|
|
2383
2404
|
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
|
|
2384
|
-
mutable_cf_options_,
|
|
2385
|
-
cfd_->internal_stats()->GetFileReadHist(level),
|
|
2405
|
+
mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
|
|
2386
2406
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
2387
2407
|
/*range_del_agg=*/nullptr,
|
|
2388
2408
|
/*compaction_boundaries=*/nullptr, allow_unprepared_value,
|
|
@@ -2440,8 +2460,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|
|
2440
2460
|
ScopedArenaPtr<InternalIterator> iter(new (mem) LevelIterator(
|
|
2441
2461
|
cfd_->table_cache(), read_options, file_options,
|
|
2442
2462
|
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
|
|
2443
|
-
mutable_cf_options_,
|
|
2444
|
-
cfd_->internal_stats()->GetFileReadHist(level),
|
|
2463
|
+
mutable_cf_options_, cfd_->internal_stats()->GetFileReadHist(level),
|
|
2445
2464
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
2446
2465
|
&range_del_agg, nullptr, false, nullptr, db_statistics_, clock_));
|
|
2447
2466
|
status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
|
|
@@ -2782,9 +2801,15 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
|
2782
2801
|
switch (get_context.State()) {
|
|
2783
2802
|
case GetContext::kNotFound:
|
|
2784
2803
|
// Keep searching in other files
|
|
2804
|
+
if (get_context.sample()) {
|
|
2805
|
+
sample_collapsible_entry_file_read_inc(f->file_metadata);
|
|
2806
|
+
}
|
|
2785
2807
|
break;
|
|
2786
2808
|
case GetContext::kMerge:
|
|
2787
2809
|
// TODO: update per-level perfcontext user_key_return_count for kMerge
|
|
2810
|
+
if (get_context.sample()) {
|
|
2811
|
+
sample_collapsible_entry_file_read_inc(f->file_metadata);
|
|
2812
|
+
}
|
|
2788
2813
|
break;
|
|
2789
2814
|
case GetContext::kFound:
|
|
2790
2815
|
if (fp.GetHitFileLevel() == 0) {
|
|
@@ -2833,6 +2858,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
|
2833
2858
|
case GetContext::kDeleted:
|
|
2834
2859
|
// Use empty error message for speed
|
|
2835
2860
|
*status = Status::NotFound();
|
|
2861
|
+
if (get_context.sample()) {
|
|
2862
|
+
sample_collapsible_entry_file_read_inc(f->file_metadata);
|
|
2863
|
+
}
|
|
2836
2864
|
return;
|
|
2837
2865
|
case GetContext::kCorrupt:
|
|
2838
2866
|
*status = Status::Corruption("corrupted key for ", user_key);
|
|
@@ -3444,7 +3472,7 @@ bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options,
|
|
|
3444
3472
|
// Ensure new invariants on old files
|
|
3445
3473
|
file_meta->num_deletions =
|
|
3446
3474
|
std::max(tp->num_deletions, tp->num_range_deletions);
|
|
3447
|
-
file_meta->num_entries = std::max(tp->num_entries,
|
|
3475
|
+
file_meta->num_entries = std::max(tp->num_entries, file_meta->num_deletions);
|
|
3448
3476
|
return true;
|
|
3449
3477
|
}
|
|
3450
3478
|
|
|
@@ -3737,7 +3765,8 @@ bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions,
|
|
|
3737
3765
|
|
|
3738
3766
|
void VersionStorageInfo::ComputeCompactionScore(
|
|
3739
3767
|
const ImmutableOptions& immutable_options,
|
|
3740
|
-
const MutableCFOptions& mutable_cf_options
|
|
3768
|
+
const MutableCFOptions& mutable_cf_options,
|
|
3769
|
+
const std::string& full_history_ts_low) {
|
|
3741
3770
|
double total_downcompact_bytes = 0.0;
|
|
3742
3771
|
// Historically, score is defined as actual bytes in a level divided by
|
|
3743
3772
|
// the level's target size, and 1.0 is the threshold for triggering
|
|
@@ -3791,15 +3820,20 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3791
3820
|
}
|
|
3792
3821
|
|
|
3793
3822
|
if (compaction_style_ == kCompactionStyleFIFO) {
|
|
3794
|
-
auto
|
|
3795
|
-
|
|
3796
|
-
|
|
3823
|
+
const auto& fifo_opts = mutable_cf_options.compaction_options_fifo;
|
|
3824
|
+
uint64_t effective_size = total_size;
|
|
3825
|
+
uint64_t effective_max = fifo_opts.max_table_files_size;
|
|
3826
|
+
if (fifo_opts.max_data_files_size > 0) {
|
|
3827
|
+
// Blob-aware: include blob file sizes in the total
|
|
3828
|
+
effective_size += GetBlobStats().total_file_size;
|
|
3829
|
+
effective_max = fifo_opts.max_data_files_size;
|
|
3830
|
+
}
|
|
3831
|
+
if (effective_max == 0) {
|
|
3797
3832
|
// avoid divide 0
|
|
3798
|
-
|
|
3833
|
+
effective_max = 1;
|
|
3799
3834
|
}
|
|
3800
|
-
score = static_cast<double>(
|
|
3801
|
-
if (score < 1 &&
|
|
3802
|
-
mutable_cf_options.compaction_options_fifo.allow_compaction) {
|
|
3835
|
+
score = static_cast<double>(effective_size) / effective_max;
|
|
3836
|
+
if (score < 1 && fifo_opts.allow_compaction) {
|
|
3803
3837
|
score = std::max(
|
|
3804
3838
|
static_cast<double>(num_sorted_runs) /
|
|
3805
3839
|
mutable_cf_options.level0_file_num_compaction_trigger,
|
|
@@ -3936,7 +3970,8 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
3936
3970
|
ComputeFilesMarkedForCompaction(max_output_level);
|
|
3937
3971
|
ComputeBottommostFilesMarkedForCompaction(
|
|
3938
3972
|
immutable_options.cf_allow_ingest_behind ||
|
|
3939
|
-
|
|
3973
|
+
immutable_options.allow_ingest_behind,
|
|
3974
|
+
immutable_options.user_comparator, full_history_ts_low);
|
|
3940
3975
|
ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl);
|
|
3941
3976
|
ComputeFilesMarkedForPeriodicCompaction(
|
|
3942
3977
|
immutable_options, mutable_cf_options.periodic_compaction_seconds,
|
|
@@ -4527,17 +4562,20 @@ void VersionStorageInfo::GenerateFileLocationIndex() {
|
|
|
4527
4562
|
}
|
|
4528
4563
|
}
|
|
4529
4564
|
|
|
4530
|
-
void VersionStorageInfo::UpdateOldestSnapshot(
|
|
4531
|
-
|
|
4565
|
+
void VersionStorageInfo::UpdateOldestSnapshot(
|
|
4566
|
+
SequenceNumber seqnum, bool allow_ingest_behind, const Comparator* ucmp,
|
|
4567
|
+
const std::string& full_history_ts_low) {
|
|
4532
4568
|
assert(seqnum >= oldest_snapshot_seqnum_);
|
|
4533
4569
|
oldest_snapshot_seqnum_ = seqnum;
|
|
4534
4570
|
if (oldest_snapshot_seqnum_ > bottommost_files_mark_threshold_) {
|
|
4535
|
-
ComputeBottommostFilesMarkedForCompaction(allow_ingest_behind
|
|
4571
|
+
ComputeBottommostFilesMarkedForCompaction(allow_ingest_behind, ucmp,
|
|
4572
|
+
full_history_ts_low);
|
|
4536
4573
|
}
|
|
4537
4574
|
}
|
|
4538
4575
|
|
|
4539
4576
|
void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction(
|
|
4540
|
-
bool allow_ingest_behind
|
|
4577
|
+
bool allow_ingest_behind, const Comparator* ucmp,
|
|
4578
|
+
const std::string& full_history_ts_low) {
|
|
4541
4579
|
bottommost_files_marked_for_compaction_.clear();
|
|
4542
4580
|
bottommost_files_mark_threshold_ = kMaxSequenceNumber;
|
|
4543
4581
|
if (allow_ingest_behind) {
|
|
@@ -4558,12 +4596,39 @@ void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction(
|
|
|
4558
4596
|
current_time - static_cast<int64_t>(bottommost_file_compaction_delay_);
|
|
4559
4597
|
}
|
|
4560
4598
|
|
|
4599
|
+
// For UDT, we need to check if the file's max timestamp is below
|
|
4600
|
+
// full_history_ts_low. If not, the compaction won't be able to collapse the
|
|
4601
|
+
// timestamp to clean up the tombstone , so marking the file would be futile
|
|
4602
|
+
// and could cause an infinite compaction loop.
|
|
4603
|
+
const bool has_udt = ucmp && ucmp->timestamp_size() > 0;
|
|
4604
|
+
|
|
4561
4605
|
for (auto& level_and_file : bottommost_files_) {
|
|
4562
4606
|
if (!level_and_file.second->being_compacted &&
|
|
4563
4607
|
level_and_file.second->fd.largest_seqno != 0) {
|
|
4564
4608
|
// largest_seqno might be nonzero due to containing the final key in an
|
|
4565
4609
|
// earlier compaction, whose seqnum we didn't zero out.
|
|
4566
4610
|
if (level_and_file.second->fd.largest_seqno < oldest_snapshot_seqnum_) {
|
|
4611
|
+
if (has_udt) {
|
|
4612
|
+
const std::string& max_ts = level_and_file.second->max_timestamp;
|
|
4613
|
+
// If max_timestamp is empty, the file could come from very old
|
|
4614
|
+
// version which does not have timestamp. In that case, we should pick
|
|
4615
|
+
// the file for compaction. After compaction, the file will have
|
|
4616
|
+
// max_timestamp set propertly.
|
|
4617
|
+
if (!max_ts.empty()) {
|
|
4618
|
+
// If full_history_ts_low is empty, it means it was never set, which
|
|
4619
|
+
// means its value is 0. Therefore, it would be always smaller than
|
|
4620
|
+
// max_timestamp
|
|
4621
|
+
if (full_history_ts_low.empty()) {
|
|
4622
|
+
continue;
|
|
4623
|
+
}
|
|
4624
|
+
// If max timestamp >= full_history_ts_low, skip this file
|
|
4625
|
+
if (ucmp->CompareTimestamp(Slice(max_ts), full_history_ts_low) >=
|
|
4626
|
+
0) {
|
|
4627
|
+
continue;
|
|
4628
|
+
}
|
|
4629
|
+
}
|
|
4630
|
+
}
|
|
4631
|
+
|
|
4567
4632
|
if (!needs_delay) {
|
|
4568
4633
|
bottommost_files_marked_for_compaction_.push_back(level_and_file);
|
|
4569
4634
|
} else if (creation_time_ub > 0) {
|
|
@@ -5542,6 +5607,96 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
|
|
|
5542
5607
|
s = LogAndApply(cfd, ReadOptions(), WriteOptions(), &edit, mu, db_dir);
|
|
5543
5608
|
}
|
|
5544
5609
|
|
|
5610
|
+
// Content validation: read back the manifest and verify CRC + decode.
|
|
5611
|
+
// Loop up to 2 checks with 1 rewrite attempt in between, so we also verify
|
|
5612
|
+
// the rewritten manifest is healthy.
|
|
5613
|
+
if (s.ok() && verify_manifest_content_on_close_) {
|
|
5614
|
+
TEST_SYNC_POINT("VersionSet::Close:BeforeContentValidation");
|
|
5615
|
+
constexpr int kMaxContentChecks = 2;
|
|
5616
|
+
for (int content_check = 0; s.ok() && content_check < kMaxContentChecks;
|
|
5617
|
+
++content_check) {
|
|
5618
|
+
// Re-read the manifest file name in case it was rotated by a rewrite
|
|
5619
|
+
std::string content_manifest_name =
|
|
5620
|
+
DescriptorFileName(dbname_, manifest_file_number_);
|
|
5621
|
+
std::unique_ptr<FSSequentialFile> manifest_file;
|
|
5622
|
+
IOStatus content_io_s = fs_->NewSequentialFile(
|
|
5623
|
+
content_manifest_name, fs_->OptimizeForManifestRead(file_options_),
|
|
5624
|
+
&manifest_file, nullptr);
|
|
5625
|
+
if (!content_io_s.ok()) {
|
|
5626
|
+
// Surface I/O errors to the caller — users who call DB::Close() and
|
|
5627
|
+
// check the status should know about filesystem problems.
|
|
5628
|
+
s = content_io_s;
|
|
5629
|
+
ROCKS_LOG_ERROR(db_options_->info_log,
|
|
5630
|
+
"MANIFEST content verification on Close: "
|
|
5631
|
+
"could not open %s for reading: %s\n",
|
|
5632
|
+
content_manifest_name.c_str(),
|
|
5633
|
+
content_io_s.ToString().c_str());
|
|
5634
|
+
break;
|
|
5635
|
+
}
|
|
5636
|
+
std::unique_ptr<SequentialFileReader> manifest_file_reader(
|
|
5637
|
+
new SequentialFileReader(std::move(manifest_file),
|
|
5638
|
+
content_manifest_name,
|
|
5639
|
+
db_options_->log_readahead_size, io_tracer_,
|
|
5640
|
+
db_options_->listeners));
|
|
5641
|
+
LogReporter reporter;
|
|
5642
|
+
Status log_read_status;
|
|
5643
|
+
reporter.status = &log_read_status;
|
|
5644
|
+
log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
|
|
5645
|
+
/*checksum=*/true, /*log_num=*/0);
|
|
5646
|
+
Slice record;
|
|
5647
|
+
std::string scratch;
|
|
5648
|
+
bool content_corrupt = false;
|
|
5649
|
+
while (reader.ReadRecord(&record, &scratch,
|
|
5650
|
+
WALRecoveryMode::kAbsoluteConsistency)) {
|
|
5651
|
+
VersionEdit edit;
|
|
5652
|
+
Status decode_s = edit.DecodeFrom(record);
|
|
5653
|
+
if (!decode_s.ok()) {
|
|
5654
|
+
content_corrupt = true;
|
|
5655
|
+
break;
|
|
5656
|
+
}
|
|
5657
|
+
}
|
|
5658
|
+
if (!content_corrupt && !log_read_status.ok()) {
|
|
5659
|
+
content_corrupt = true;
|
|
5660
|
+
}
|
|
5661
|
+
if (!content_corrupt) {
|
|
5662
|
+
// Manifest is healthy, no need to check again
|
|
5663
|
+
break;
|
|
5664
|
+
}
|
|
5665
|
+
IOStatus corrupt_io_s =
|
|
5666
|
+
IOStatus::Corruption("MANIFEST content validation failed");
|
|
5667
|
+
IOErrorInfo io_error_info(corrupt_io_s, FileOperationType::kVerify,
|
|
5668
|
+
content_manifest_name, /*length=*/0,
|
|
5669
|
+
/*offset=*/0);
|
|
5670
|
+
for (auto& listener : db_options_->listeners) {
|
|
5671
|
+
listener->OnIOError(io_error_info);
|
|
5672
|
+
}
|
|
5673
|
+
corrupt_io_s.PermitUncheckedError();
|
|
5674
|
+
io_error_info.io_status.PermitUncheckedError();
|
|
5675
|
+
if (content_check == 0) {
|
|
5676
|
+
// First check failed — rewrite and verify again
|
|
5677
|
+
ROCKS_LOG_ERROR(db_options_->info_log,
|
|
5678
|
+
"MANIFEST content verification on Close failed, "
|
|
5679
|
+
"filename %s, rewriting manifest\n",
|
|
5680
|
+
content_manifest_name.c_str());
|
|
5681
|
+
ColumnFamilyData* cfd = GetColumnFamilySet()->GetDefault();
|
|
5682
|
+
VersionEdit recovery_edit;
|
|
5683
|
+
assert(cfd);
|
|
5684
|
+
s = LogAndApply(cfd, ReadOptions(), WriteOptions(), &recovery_edit, mu,
|
|
5685
|
+
db_dir);
|
|
5686
|
+
} else {
|
|
5687
|
+
// Rewritten manifest is also corrupt — likely a recurring filesystem
|
|
5688
|
+
// issue. Surface it so DB::Close() callers can detect the problem.
|
|
5689
|
+
ROCKS_LOG_ERROR(db_options_->info_log,
|
|
5690
|
+
"MANIFEST content verification on Close failed again "
|
|
5691
|
+
"after rewrite, filename %s\n",
|
|
5692
|
+
content_manifest_name.c_str());
|
|
5693
|
+
s = Status::Corruption(
|
|
5694
|
+
"MANIFEST content verification failed after rewrite: " +
|
|
5695
|
+
content_manifest_name);
|
|
5696
|
+
}
|
|
5697
|
+
}
|
|
5698
|
+
}
|
|
5699
|
+
|
|
5545
5700
|
closed_ = true;
|
|
5546
5701
|
return s;
|
|
5547
5702
|
}
|
|
@@ -5558,9 +5713,10 @@ VersionSet::~VersionSet() {
|
|
|
5558
5713
|
// Using uncache_aggressiveness=0 overrides any previous marking to
|
|
5559
5714
|
// attempt to uncache the file's blocks (which after cleaning up
|
|
5560
5715
|
// column families could cause use-after-free)
|
|
5561
|
-
TableCache::ReleaseObsolete(
|
|
5562
|
-
|
|
5563
|
-
|
|
5716
|
+
TableCache::ReleaseObsolete(
|
|
5717
|
+
table_cache_, file.metadata->fd.GetNumber(),
|
|
5718
|
+
file.metadata->fd.pinned_reader.GetCacheHandle(),
|
|
5719
|
+
/*uncache_aggressiveness=*/0);
|
|
5564
5720
|
file.DeleteMetadata();
|
|
5565
5721
|
}
|
|
5566
5722
|
obsolete_files_.clear();
|
|
@@ -5624,6 +5780,8 @@ void VersionSet::UpdatedMutableDbOptions(
|
|
|
5624
5780
|
max_manifest_space_amp_pct_ = static_cast<unsigned>(
|
|
5625
5781
|
std::max(updated_options.max_manifest_space_amp_pct, 0));
|
|
5626
5782
|
manifest_preallocation_size_ = updated_options.manifest_preallocation_size;
|
|
5783
|
+
verify_manifest_content_on_close_ =
|
|
5784
|
+
updated_options.verify_manifest_content_on_close;
|
|
5627
5785
|
TuneMaxManifestFileSize();
|
|
5628
5786
|
}
|
|
5629
5787
|
|
|
@@ -5639,7 +5797,8 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
|
|
|
5639
5797
|
// compute new compaction score
|
|
5640
5798
|
v->storage_info()->ComputeCompactionScore(
|
|
5641
5799
|
column_family_data->ioptions(),
|
|
5642
|
-
column_family_data->GetLatestMutableCFOptions()
|
|
5800
|
+
column_family_data->GetLatestMutableCFOptions(),
|
|
5801
|
+
column_family_data->GetFullHistoryTsLow());
|
|
5643
5802
|
|
|
5644
5803
|
// Mark v finalized
|
|
5645
5804
|
v->storage_info_.SetFinalized();
|
|
@@ -6497,7 +6656,8 @@ Status VersionSet::Recover(
|
|
|
6497
6656
|
read_only, column_families, const_cast<VersionSet*>(this),
|
|
6498
6657
|
/*track_found_and_missing_files=*/false, no_error_if_files_missing,
|
|
6499
6658
|
io_tracer_, read_options, /*allow_incomplete_valid_version=*/false,
|
|
6500
|
-
EpochNumberRequirement::kMightMissing
|
|
6659
|
+
EpochNumberRequirement::kMightMissing,
|
|
6660
|
+
/*skip_load_table_files=*/db_options_->open_files_async);
|
|
6501
6661
|
handler.Iterate(reader, &log_read_status);
|
|
6502
6662
|
s = handler.status();
|
|
6503
6663
|
if (s.ok()) {
|
|
@@ -7102,7 +7262,6 @@ Status VersionSet::WriteCurrentStateToManifest(
|
|
|
7102
7262
|
|
|
7103
7263
|
for (const auto& f : level_files) {
|
|
7104
7264
|
assert(f);
|
|
7105
|
-
|
|
7106
7265
|
edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(),
|
|
7107
7266
|
f->fd.GetFileSize(), f->smallest, f->largest,
|
|
7108
7267
|
f->fd.smallest_seqno, f->fd.largest_seqno,
|
|
@@ -7111,7 +7270,8 @@ Status VersionSet::WriteCurrentStateToManifest(
|
|
|
7111
7270
|
f->file_creation_time, f->epoch_number, f->file_checksum,
|
|
7112
7271
|
f->file_checksum_func_name, f->unique_id,
|
|
7113
7272
|
f->compensated_range_deletion_size, f->tail_size,
|
|
7114
|
-
f->user_defined_timestamps_persisted
|
|
7273
|
+
f->user_defined_timestamps_persisted, f->min_timestamp,
|
|
7274
|
+
f->max_timestamp);
|
|
7115
7275
|
}
|
|
7116
7276
|
}
|
|
7117
7277
|
|
|
@@ -7550,7 +7710,6 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|
|
7550
7710
|
list[num++] = new LevelIterator(
|
|
7551
7711
|
cfd->table_cache(), read_options, file_options_compactions,
|
|
7552
7712
|
cfd->internal_comparator(), flevel, c->mutable_cf_options(),
|
|
7553
|
-
/*should_sample=*/false,
|
|
7554
7713
|
/*no per level latency histogram=*/nullptr,
|
|
7555
7714
|
TableReaderCaller::kCompaction, /*skip_filters=*/false,
|
|
7556
7715
|
/*level=*/static_cast<int>(c->level(which)), range_del_agg,
|
|
@@ -7816,10 +7975,12 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
|
|
|
7816
7975
|
InternalStats* internal_stats = cfd->internal_stats();
|
|
7817
7976
|
|
|
7818
7977
|
TableCache::TypedHandle* handle = nullptr;
|
|
7978
|
+
TableReader* table_reader = nullptr;
|
|
7819
7979
|
FileMetaData meta_copy = meta;
|
|
7820
7980
|
status = table_cache->FindTable(
|
|
7821
7981
|
read_options, file_opts, *icmp, meta_copy, &handle, cf_opts,
|
|
7822
|
-
/*no_io=*/false, internal_stats->GetFileReadHist(level),
|
|
7982
|
+
&table_reader, /*no_io=*/false, internal_stats->GetFileReadHist(level),
|
|
7983
|
+
false, level,
|
|
7823
7984
|
/*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
|
|
7824
7985
|
meta_copy.temperature);
|
|
7825
7986
|
if (handle) {
|
|
@@ -200,7 +200,8 @@ class VersionStorageInfo {
|
|
|
200
200
|
// REQUIRES: db_mutex held!!
|
|
201
201
|
// TODO find a better way to pass compaction_options_fifo.
|
|
202
202
|
void ComputeCompactionScore(const ImmutableOptions& immutable_options,
|
|
203
|
-
const MutableCFOptions& mutable_cf_options
|
|
203
|
+
const MutableCFOptions& mutable_cf_options,
|
|
204
|
+
const std::string& full_history_ts_low);
|
|
204
205
|
|
|
205
206
|
// Estimate est_comp_needed_bytes_
|
|
206
207
|
void EstimateCompactionBytesNeeded(
|
|
@@ -230,8 +231,15 @@ class VersionStorageInfo {
|
|
|
230
231
|
// oldest snapshot changes as that is when bottom-level files can become
|
|
231
232
|
// eligible for compaction.
|
|
232
233
|
//
|
|
234
|
+
// For columns with User Defined Timestamps (UDT), also checks that the
|
|
235
|
+
// file's largest timestamp is below full_history_ts_low before marking,
|
|
236
|
+
// since compaction can only collapse timestamp when it is below this
|
|
237
|
+
// threshold.
|
|
238
|
+
//
|
|
233
239
|
// REQUIRES: DB mutex held
|
|
234
|
-
void ComputeBottommostFilesMarkedForCompaction(
|
|
240
|
+
void ComputeBottommostFilesMarkedForCompaction(
|
|
241
|
+
bool allow_ingest_behind, const Comparator* ucmp,
|
|
242
|
+
const std::string& full_history_ts_low);
|
|
235
243
|
|
|
236
244
|
// This computes files_marked_for_forced_blob_gc_ and is called by
|
|
237
245
|
// ComputeCompactionScore()
|
|
@@ -248,7 +256,8 @@ class VersionStorageInfo {
|
|
|
248
256
|
// files marked for compaction.
|
|
249
257
|
// REQUIRES: DB mutex held
|
|
250
258
|
void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum,
|
|
251
|
-
bool allow_ingest_behind
|
|
259
|
+
bool allow_ingest_behind, const Comparator* ucmp,
|
|
260
|
+
const std::string& full_history_ts_low);
|
|
252
261
|
|
|
253
262
|
int MaxInputLevel() const;
|
|
254
263
|
int MaxOutputLevel(bool allow_ingest_behind) const;
|
|
@@ -1425,6 +1434,29 @@ class VersionSet {
|
|
|
1425
1434
|
return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst);
|
|
1426
1435
|
}
|
|
1427
1436
|
|
|
1437
|
+
// Sync last_sequence_ with last_allocated_sequence_. This should be called
|
|
1438
|
+
// during error recovery to ensure that any sequence numbers that were
|
|
1439
|
+
// allocated (written to WAL) but not yet published are accounted for when
|
|
1440
|
+
// creating new memtables/WALs. This prevents the "sequence number going
|
|
1441
|
+
// backwards" corruption on subsequent recovery.
|
|
1442
|
+
//
|
|
1443
|
+
// This is necessary because with two_write_queues=true, writes allocate
|
|
1444
|
+
// sequence numbers via FetchAddLastAllocatedSequence() before the write
|
|
1445
|
+
// is complete, but only publish via SetLastSequence() after success.
|
|
1446
|
+
// If an error occurs and recovery creates new memtables, SwitchMemtable
|
|
1447
|
+
// uses LastSequence() which may be lower than already-allocated sequences.
|
|
1448
|
+
//
|
|
1449
|
+
// REQUIRED: DB mutex is held and no concurrent writers are active (i.e.,
|
|
1450
|
+
// after WaitForBackgroundWork() in ResumeImpl).
|
|
1451
|
+
void SyncLastSequenceWithAllocated() {
|
|
1452
|
+
uint64_t alloc_seq =
|
|
1453
|
+
last_allocated_sequence_.load(std::memory_order_seq_cst);
|
|
1454
|
+
uint64_t last_seq = last_sequence_.load(std::memory_order_acquire);
|
|
1455
|
+
if (alloc_seq > last_seq) {
|
|
1456
|
+
last_sequence_.store(alloc_seq, std::memory_order_release);
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1428
1460
|
// Mark the specified file number as used.
|
|
1429
1461
|
// REQUIRED: this is only called during single-threaded recovery or repair.
|
|
1430
1462
|
void MarkFileNumberUsed(uint64_t number);
|
|
@@ -1706,6 +1738,9 @@ class VersionSet {
|
|
|
1706
1738
|
// The last sequence number of data committed to the descriptor (manifest
|
|
1707
1739
|
// file).
|
|
1708
1740
|
SequenceNumber descriptor_last_sequence_ = 0;
|
|
1741
|
+
// See write_prepared_txn.h for a more detailed description of how Write
|
|
1742
|
+
// Prepared transactions work, with concrete examples.
|
|
1743
|
+
//
|
|
1709
1744
|
// The last seq that is already allocated. It is applicable only when we have
|
|
1710
1745
|
// two write queues. In that case seq might or might not have appreated in
|
|
1711
1746
|
// memtable but it is expected to appear in the WAL.
|
|
@@ -1744,6 +1779,8 @@ class VersionSet {
|
|
|
1744
1779
|
unsigned max_manifest_space_amp_pct_;
|
|
1745
1780
|
// Saved copy from (Mutable)DBOptions
|
|
1746
1781
|
size_t manifest_preallocation_size_;
|
|
1782
|
+
// Saved copy from (Mutable)DBOptions
|
|
1783
|
+
bool verify_manifest_content_on_close_;
|
|
1747
1784
|
|
|
1748
1785
|
// Obsolete files, or during DB shutdown any files not referenced by what's
|
|
1749
1786
|
// left of the in-memory LSM state.
|
|
@@ -58,6 +58,11 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
|
|
|
58
58
|
|
|
59
59
|
if (get_context.sample()) {
|
|
60
60
|
sample_file_read_inc(f->file_metadata);
|
|
61
|
+
if (get_context.State() == GetContext::kNotFound ||
|
|
62
|
+
get_context.State() == GetContext::kMerge ||
|
|
63
|
+
get_context.State() == GetContext::kDeleted) {
|
|
64
|
+
sample_collapsible_entry_file_read_inc(f->file_metadata);
|
|
65
|
+
}
|
|
61
66
|
}
|
|
62
67
|
batch_size++;
|
|
63
68
|
num_index_read += get_context.get_context_stats_.num_index_read;
|