@nxtedition/rocksdb 8.2.0 → 8.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +3 -3
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +8 -345
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
- package/deps/rocksdb/rocksdb/db/builder.h +2 -2
- package/deps/rocksdb/rocksdb/db/c.cc +76 -5
- package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
- package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
- package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
- package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
- package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
- package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
- package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
- package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
- package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
- package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
- package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
- package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
- package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
- package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
- package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
- package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
- package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
- package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
- package/deps/rocksdb/rocksdb/options/options.cc +12 -53
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
- package/deps/rocksdb/rocksdb/port/lang.h +27 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
- package/deps/rocksdb/rocksdb/src.mk +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
- package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/format.cc +4 -4
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
- package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
- package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
- package/deps/rocksdb/rocksdb/util/compression.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
- package/deps/rocksdb/rocksdb/util/math.h +12 -7
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
- package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
- package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
- /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
#include <string>
|
|
15
15
|
#include <vector>
|
|
16
16
|
|
|
17
|
+
#include "db/kv_checksum.h"
|
|
17
18
|
#include "db/pinned_iterators_manager.h"
|
|
18
19
|
#include "port/malloc.h"
|
|
19
20
|
#include "rocksdb/advanced_cache.h"
|
|
@@ -240,6 +241,34 @@ class Block {
|
|
|
240
241
|
// For TypedCacheInterface
|
|
241
242
|
const Slice& ContentSlice() const { return contents_.data; }
|
|
242
243
|
|
|
244
|
+
// Initializes per key-value checksum protection.
|
|
245
|
+
// After this method is called, each DataBlockIterator returned
|
|
246
|
+
// by NewDataIterator will verify per key-value checksum for any key it read.
|
|
247
|
+
void InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
|
248
|
+
const Comparator* raw_ucmp);
|
|
249
|
+
|
|
250
|
+
// Initializes per key-value checksum protection.
|
|
251
|
+
// After this method is called, each IndexBlockIterator returned
|
|
252
|
+
// by NewIndexIterator will verify per key-value checksum for any key it read.
|
|
253
|
+
// value_is_full and index_has_first_key are needed to be able to parse
|
|
254
|
+
// the index block content and construct checksums.
|
|
255
|
+
void InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
|
256
|
+
const Comparator* raw_ucmp,
|
|
257
|
+
bool value_is_full,
|
|
258
|
+
bool index_has_first_key);
|
|
259
|
+
|
|
260
|
+
// Initializes per key-value checksum protection.
|
|
261
|
+
// After this method is called, each MetaBlockIter returned
|
|
262
|
+
// by NewMetaIterator will verify per key-value checksum for any key it read.
|
|
263
|
+
void InitializeMetaIndexBlockProtectionInfo(uint8_t protection_bytes_per_key);
|
|
264
|
+
|
|
265
|
+
static void GenerateKVChecksum(char* checksum_ptr, uint8_t checksum_len,
|
|
266
|
+
const Slice& key, const Slice& value) {
|
|
267
|
+
ProtectionInfo64().ProtectKV(key, value).Encode(checksum_len, checksum_ptr);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const char* TEST_GetKVChecksum() const { return kv_checksum_; }
|
|
271
|
+
|
|
243
272
|
private:
|
|
244
273
|
BlockContents contents_;
|
|
245
274
|
const char* data_; // contents_.data.data()
|
|
@@ -247,6 +276,11 @@ class Block {
|
|
|
247
276
|
uint32_t restart_offset_; // Offset in data_ of restart array
|
|
248
277
|
uint32_t num_restarts_;
|
|
249
278
|
std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
|
|
279
|
+
char* kv_checksum_{nullptr};
|
|
280
|
+
uint32_t checksum_size_{0};
|
|
281
|
+
// Used by block iterators to calculate current key index within a block
|
|
282
|
+
uint32_t block_restart_interval_{0};
|
|
283
|
+
uint8_t protection_bytes_per_key_{0};
|
|
250
284
|
DataBlockHashIndex data_block_hash_index_;
|
|
251
285
|
};
|
|
252
286
|
|
|
@@ -269,6 +303,14 @@ class Block {
|
|
|
269
303
|
// `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
|
|
270
304
|
// "Impl" functions are responsible for positioning `raw_key_` but not
|
|
271
305
|
// invoking `UpdateKey()`.
|
|
306
|
+
//
|
|
307
|
+
// Per key-value checksum is enabled if relevant states are passed in during
|
|
308
|
+
// `InitializeBase()`. The checksum verification is done in each call to
|
|
309
|
+
// UpdateKey() for the current key. Each subclass is responsible for keeping
|
|
310
|
+
// track of cur_entry_idx_, the index of the current key within the block.
|
|
311
|
+
// BlockIter uses this index to get the corresponding checksum for current key.
|
|
312
|
+
// Additional checksum verification may be done in subclasses if they read keys
|
|
313
|
+
// other than the key being processed in UpdateKey().
|
|
272
314
|
template <class TValue>
|
|
273
315
|
class BlockIter : public InternalIteratorBase<TValue> {
|
|
274
316
|
public:
|
|
@@ -286,9 +328,16 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
286
328
|
Cleanable::Reset();
|
|
287
329
|
}
|
|
288
330
|
|
|
289
|
-
bool Valid() const override {
|
|
331
|
+
bool Valid() const override {
|
|
332
|
+
// When status_ is not ok, iter should be invalid.
|
|
333
|
+
assert(status_.ok() || current_ >= restarts_);
|
|
334
|
+
return current_ < restarts_;
|
|
335
|
+
}
|
|
290
336
|
|
|
291
337
|
virtual void SeekToFirst() override final {
|
|
338
|
+
#ifndef NDEBUG
|
|
339
|
+
if (TEST_Corrupt_Callback("BlockIter::SeekToFirst")) return;
|
|
340
|
+
#endif
|
|
292
341
|
SeekToFirstImpl();
|
|
293
342
|
UpdateKey();
|
|
294
343
|
}
|
|
@@ -325,6 +374,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
325
374
|
}
|
|
326
375
|
|
|
327
376
|
Status status() const override { return status_; }
|
|
377
|
+
|
|
328
378
|
Slice key() const override {
|
|
329
379
|
assert(Valid());
|
|
330
380
|
return key_;
|
|
@@ -337,10 +387,22 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
337
387
|
(pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled()));
|
|
338
388
|
status_.PermitUncheckedError();
|
|
339
389
|
}
|
|
390
|
+
|
|
340
391
|
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
|
341
392
|
pinned_iters_mgr_ = pinned_iters_mgr;
|
|
342
393
|
}
|
|
394
|
+
|
|
343
395
|
PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
|
|
396
|
+
|
|
397
|
+
bool TEST_Corrupt_Callback(const std::string& sync_point) {
|
|
398
|
+
bool corrupt = false;
|
|
399
|
+
TEST_SYNC_POINT_CALLBACK(sync_point, static_cast<void*>(&corrupt));
|
|
400
|
+
|
|
401
|
+
if (corrupt) {
|
|
402
|
+
CorruptionError();
|
|
403
|
+
}
|
|
404
|
+
return corrupt;
|
|
405
|
+
}
|
|
344
406
|
#endif
|
|
345
407
|
|
|
346
408
|
bool IsKeyPinned() const override {
|
|
@@ -377,27 +439,74 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
377
439
|
Status status_;
|
|
378
440
|
// Key to be exposed to users.
|
|
379
441
|
Slice key_;
|
|
442
|
+
SequenceNumber global_seqno_;
|
|
443
|
+
|
|
444
|
+
// Per key-value checksum related states
|
|
445
|
+
const char* kv_checksum_;
|
|
446
|
+
int32_t cur_entry_idx_;
|
|
447
|
+
uint32_t block_restart_interval_;
|
|
448
|
+
uint8_t protection_bytes_per_key_;
|
|
449
|
+
|
|
380
450
|
bool key_pinned_;
|
|
381
451
|
// Whether the block data is guaranteed to outlive this iterator, and
|
|
382
452
|
// as long as the cleanup functions are transferred to another class,
|
|
383
453
|
// e.g. PinnableSlice, the pointer to the bytes will still be valid.
|
|
384
454
|
bool block_contents_pinned_;
|
|
385
|
-
SequenceNumber global_seqno_;
|
|
386
455
|
|
|
387
456
|
virtual void SeekToFirstImpl() = 0;
|
|
388
457
|
virtual void SeekToLastImpl() = 0;
|
|
389
458
|
virtual void SeekImpl(const Slice& target) = 0;
|
|
390
459
|
virtual void SeekForPrevImpl(const Slice& target) = 0;
|
|
391
460
|
virtual void NextImpl() = 0;
|
|
392
|
-
|
|
393
461
|
virtual void PrevImpl() = 0;
|
|
394
462
|
|
|
463
|
+
// Returns the restart interval of this block.
|
|
464
|
+
// Returns 0 if num_restarts_ <= 1 or if the BlockIter is not initialized.
|
|
465
|
+
virtual uint32_t GetRestartInterval() {
|
|
466
|
+
if (num_restarts_ <= 1 || data_ == nullptr) {
|
|
467
|
+
return 0;
|
|
468
|
+
}
|
|
469
|
+
SeekToFirstImpl();
|
|
470
|
+
uint32_t end_index = GetRestartPoint(1);
|
|
471
|
+
uint32_t count = 1;
|
|
472
|
+
while (NextEntryOffset() < end_index && status_.ok()) {
|
|
473
|
+
assert(Valid());
|
|
474
|
+
NextImpl();
|
|
475
|
+
++count;
|
|
476
|
+
}
|
|
477
|
+
return count;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Returns the number of keys in this block.
|
|
481
|
+
virtual uint32_t NumberOfKeys(uint32_t block_restart_interval) {
|
|
482
|
+
if (num_restarts_ == 0 || data_ == nullptr) {
|
|
483
|
+
return 0;
|
|
484
|
+
}
|
|
485
|
+
uint32_t count = (num_restarts_ - 1) * block_restart_interval;
|
|
486
|
+
// Add number of keys from the last restart interval
|
|
487
|
+
SeekToRestartPoint(num_restarts_ - 1);
|
|
488
|
+
while (NextEntryOffset() < restarts_ && status_.ok()) {
|
|
489
|
+
NextImpl();
|
|
490
|
+
++count;
|
|
491
|
+
}
|
|
492
|
+
return count;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Stores whether the current key has a shared bytes with prev key in
|
|
496
|
+
// *is_shared.
|
|
497
|
+
// Sets raw_key_, value_ to the current parsed key and value.
|
|
498
|
+
// Sets restart_index_ to point to the restart interval that contains
|
|
499
|
+
// the current key.
|
|
395
500
|
template <typename DecodeEntryFunc>
|
|
396
501
|
inline bool ParseNextKey(bool* is_shared);
|
|
397
502
|
|
|
503
|
+
// protection_bytes_per_key, kv_checksum, and block_restart_interval
|
|
504
|
+
// are needed only for per kv checksum verification.
|
|
398
505
|
void InitializeBase(const Comparator* raw_ucmp, const char* data,
|
|
399
506
|
uint32_t restarts, uint32_t num_restarts,
|
|
400
|
-
SequenceNumber global_seqno, bool block_contents_pinned
|
|
507
|
+
SequenceNumber global_seqno, bool block_contents_pinned,
|
|
508
|
+
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
|
509
|
+
uint32_t block_restart_interval) {
|
|
401
510
|
assert(data_ == nullptr); // Ensure it is called only once
|
|
402
511
|
assert(num_restarts > 0); // Ensure the param is valid
|
|
403
512
|
|
|
@@ -410,11 +519,41 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
410
519
|
global_seqno_ = global_seqno;
|
|
411
520
|
block_contents_pinned_ = block_contents_pinned;
|
|
412
521
|
cache_handle_ = nullptr;
|
|
522
|
+
cur_entry_idx_ = -1;
|
|
523
|
+
protection_bytes_per_key_ = protection_bytes_per_key;
|
|
524
|
+
kv_checksum_ = kv_checksum;
|
|
525
|
+
block_restart_interval_ = block_restart_interval;
|
|
526
|
+
// Checksum related states are either all 0/nullptr or all non-zero.
|
|
527
|
+
// One exception is when num_restarts == 0, block_restart_interval can be 0
|
|
528
|
+
// since we are not able to compute it.
|
|
529
|
+
assert((protection_bytes_per_key == 0 && kv_checksum == nullptr) ||
|
|
530
|
+
(protection_bytes_per_key > 0 && kv_checksum != nullptr &&
|
|
531
|
+
(block_restart_interval > 0 || num_restarts == 1)));
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
void CorruptionError(const std::string& error_msg = "bad entry in block") {
|
|
535
|
+
current_ = restarts_;
|
|
536
|
+
restart_index_ = num_restarts_;
|
|
537
|
+
status_ = Status::Corruption(error_msg);
|
|
538
|
+
raw_key_.Clear();
|
|
539
|
+
value_.clear();
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
void PerKVChecksumCorruptionError() {
|
|
543
|
+
std::string error_msg{
|
|
544
|
+
"Corrupted block entry: per key-value checksum verification "
|
|
545
|
+
"failed."};
|
|
546
|
+
error_msg.append(" Offset: " + std::to_string(current_) + ".");
|
|
547
|
+
error_msg.append(" Entry index: " + std::to_string(cur_entry_idx_) + ".");
|
|
548
|
+
CorruptionError(error_msg);
|
|
413
549
|
}
|
|
414
550
|
|
|
415
551
|
// Must be called every time a key is found that needs to be returned to user,
|
|
416
552
|
// and may be called when no key is found (as a no-op). Updates `key_`,
|
|
417
553
|
// `key_buf_`, and `key_pinned_` with info about the found key.
|
|
554
|
+
// Per key-value checksum verification is done if available for the key to be
|
|
555
|
+
// returned. Iterator is invalidated with corruption status if checksum
|
|
556
|
+
// verification fails.
|
|
418
557
|
void UpdateKey() {
|
|
419
558
|
key_buf_.Clear();
|
|
420
559
|
if (!Valid()) {
|
|
@@ -433,6 +572,19 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
433
572
|
key_ = key_buf_.GetInternalKey();
|
|
434
573
|
key_pinned_ = false;
|
|
435
574
|
}
|
|
575
|
+
TEST_SYNC_POINT_CALLBACK("BlockIter::UpdateKey::value",
|
|
576
|
+
(void*)value_.data());
|
|
577
|
+
TEST_SYNC_POINT_CALLBACK("Block::VerifyChecksum::checksum_len",
|
|
578
|
+
&protection_bytes_per_key_);
|
|
579
|
+
if (protection_bytes_per_key_ > 0) {
|
|
580
|
+
if (!ProtectionInfo64()
|
|
581
|
+
.ProtectKV(raw_key_.GetKey(), value_)
|
|
582
|
+
.Verify(
|
|
583
|
+
protection_bytes_per_key_,
|
|
584
|
+
kv_checksum_ + protection_bytes_per_key_ * cur_entry_idx_)) {
|
|
585
|
+
PerKVChecksumCorruptionError();
|
|
586
|
+
}
|
|
587
|
+
}
|
|
436
588
|
}
|
|
437
589
|
|
|
438
590
|
// Returns the result of `Comparator::Compare()`, where the appropriate
|
|
@@ -464,7 +616,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
464
616
|
return static_cast<uint32_t>((value_.data() + value_.size()) - data_);
|
|
465
617
|
}
|
|
466
618
|
|
|
467
|
-
uint32_t GetRestartPoint(uint32_t index) {
|
|
619
|
+
uint32_t GetRestartPoint(uint32_t index) const {
|
|
468
620
|
assert(index < num_restarts_);
|
|
469
621
|
return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
|
|
470
622
|
}
|
|
@@ -479,13 +631,20 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|
|
479
631
|
value_ = Slice(data_ + offset, 0);
|
|
480
632
|
}
|
|
481
633
|
|
|
482
|
-
void CorruptionError();
|
|
483
|
-
|
|
484
634
|
protected:
|
|
485
635
|
template <typename DecodeKeyFunc>
|
|
486
636
|
inline bool BinarySeek(const Slice& target, uint32_t* index,
|
|
487
637
|
bool* is_index_key_result);
|
|
488
638
|
|
|
639
|
+
// Find the first key in restart interval `index` that is >= `target`.
|
|
640
|
+
// If there is no such key, iterator is positioned at the first key in
|
|
641
|
+
// restart interval `index + 1`.
|
|
642
|
+
// If is_index_key_result is true, it positions the iterator at the first key
|
|
643
|
+
// in this restart interval.
|
|
644
|
+
// Per key-value checksum verification is done for all keys scanned
|
|
645
|
+
// up to but not including the last key (the key that current_ points to
|
|
646
|
+
// when this function returns). This key's checksum is verified in
|
|
647
|
+
// UpdateKey().
|
|
489
648
|
void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
|
|
490
649
|
bool is_index_key_result);
|
|
491
650
|
};
|
|
@@ -494,22 +653,17 @@ class DataBlockIter final : public BlockIter<Slice> {
|
|
|
494
653
|
public:
|
|
495
654
|
DataBlockIter()
|
|
496
655
|
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
|
|
497
|
-
DataBlockIter(const Comparator* raw_ucmp, const char* data, uint32_t restarts,
|
|
498
|
-
uint32_t num_restarts, SequenceNumber global_seqno,
|
|
499
|
-
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned,
|
|
500
|
-
DataBlockHashIndex* data_block_hash_index)
|
|
501
|
-
: DataBlockIter() {
|
|
502
|
-
Initialize(raw_ucmp, data, restarts, num_restarts, global_seqno,
|
|
503
|
-
read_amp_bitmap, block_contents_pinned, data_block_hash_index);
|
|
504
|
-
}
|
|
505
656
|
void Initialize(const Comparator* raw_ucmp, const char* data,
|
|
506
657
|
uint32_t restarts, uint32_t num_restarts,
|
|
507
658
|
SequenceNumber global_seqno,
|
|
508
659
|
BlockReadAmpBitmap* read_amp_bitmap,
|
|
509
660
|
bool block_contents_pinned,
|
|
510
|
-
DataBlockHashIndex* data_block_hash_index
|
|
661
|
+
DataBlockHashIndex* data_block_hash_index,
|
|
662
|
+
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
|
663
|
+
uint32_t block_restart_interval) {
|
|
511
664
|
InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno,
|
|
512
|
-
block_contents_pinned
|
|
665
|
+
block_contents_pinned, protection_bytes_per_key, kv_checksum,
|
|
666
|
+
block_restart_interval);
|
|
513
667
|
raw_key_.SetIsUserKey(false);
|
|
514
668
|
read_amp_bitmap_ = read_amp_bitmap;
|
|
515
669
|
last_bitmap_offset_ = current_ + 1;
|
|
@@ -527,7 +681,11 @@ class DataBlockIter final : public BlockIter<Slice> {
|
|
|
527
681
|
return value_;
|
|
528
682
|
}
|
|
529
683
|
|
|
684
|
+
// Returns if `target` may exist.
|
|
530
685
|
inline bool SeekForGet(const Slice& target) {
|
|
686
|
+
#ifndef NDEBUG
|
|
687
|
+
if (TEST_Corrupt_Callback("DataBlockIter::SeekForGet")) return true;
|
|
688
|
+
#endif
|
|
531
689
|
if (!data_block_hash_index_) {
|
|
532
690
|
SeekImpl(target);
|
|
533
691
|
UpdateKey();
|
|
@@ -599,11 +757,14 @@ class MetaBlockIter final : public BlockIter<Slice> {
|
|
|
599
757
|
public:
|
|
600
758
|
MetaBlockIter() : BlockIter() { raw_key_.SetIsUserKey(true); }
|
|
601
759
|
void Initialize(const char* data, uint32_t restarts, uint32_t num_restarts,
|
|
602
|
-
bool block_contents_pinned
|
|
760
|
+
bool block_contents_pinned, uint8_t protection_bytes_per_key,
|
|
761
|
+
const char* kv_checksum, uint32_t block_restart_interval) {
|
|
603
762
|
// Initializes the iterator with a BytewiseComparator and
|
|
604
763
|
// the raw key being a user key.
|
|
605
764
|
InitializeBase(BytewiseComparator(), data, restarts, num_restarts,
|
|
606
|
-
kDisableGlobalSequenceNumber, block_contents_pinned
|
|
765
|
+
kDisableGlobalSequenceNumber, block_contents_pinned,
|
|
766
|
+
protection_bytes_per_key, kv_checksum,
|
|
767
|
+
block_restart_interval);
|
|
607
768
|
raw_key_.SetIsUserKey(true);
|
|
608
769
|
}
|
|
609
770
|
|
|
@@ -613,12 +774,17 @@ class MetaBlockIter final : public BlockIter<Slice> {
|
|
|
613
774
|
}
|
|
614
775
|
|
|
615
776
|
protected:
|
|
777
|
+
friend Block;
|
|
616
778
|
void SeekToFirstImpl() override;
|
|
617
779
|
void SeekToLastImpl() override;
|
|
618
780
|
void SeekImpl(const Slice& target) override;
|
|
619
781
|
void SeekForPrevImpl(const Slice& target) override;
|
|
620
782
|
void NextImpl() override;
|
|
621
783
|
void PrevImpl() override;
|
|
784
|
+
// Meta index block's restart interval is always 1. See
|
|
785
|
+
// MetaIndexBuilder::MetaIndexBuilder() for hard-coded restart interval.
|
|
786
|
+
uint32_t GetRestartInterval() override { return 1; }
|
|
787
|
+
uint32_t NumberOfKeys(uint32_t) override { return num_restarts_; }
|
|
622
788
|
};
|
|
623
789
|
|
|
624
790
|
class IndexBlockIter final : public BlockIter<IndexValue> {
|
|
@@ -633,9 +799,13 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|
|
633
799
|
uint32_t restarts, uint32_t num_restarts,
|
|
634
800
|
SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
|
|
635
801
|
bool have_first_key, bool key_includes_seq,
|
|
636
|
-
bool value_is_full, bool block_contents_pinned
|
|
802
|
+
bool value_is_full, bool block_contents_pinned,
|
|
803
|
+
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
|
804
|
+
uint32_t block_restart_interval) {
|
|
637
805
|
InitializeBase(raw_ucmp, data, restarts, num_restarts,
|
|
638
|
-
kDisableGlobalSequenceNumber, block_contents_pinned
|
|
806
|
+
kDisableGlobalSequenceNumber, block_contents_pinned,
|
|
807
|
+
protection_bytes_per_key, kv_checksum,
|
|
808
|
+
block_restart_interval);
|
|
639
809
|
raw_key_.SetIsUserKey(!key_includes_seq);
|
|
640
810
|
prefix_index_ = prefix_index;
|
|
641
811
|
value_delta_encoded_ = !value_is_full;
|
|
@@ -666,11 +836,17 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|
|
666
836
|
}
|
|
667
837
|
}
|
|
668
838
|
|
|
839
|
+
Slice raw_value() const {
|
|
840
|
+
assert(Valid());
|
|
841
|
+
return value_;
|
|
842
|
+
}
|
|
843
|
+
|
|
669
844
|
bool IsValuePinned() const override {
|
|
670
845
|
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
|
|
671
846
|
}
|
|
672
847
|
|
|
673
848
|
protected:
|
|
849
|
+
friend Block;
|
|
674
850
|
// IndexBlockIter follows a different contract for prefix iterator
|
|
675
851
|
// from data iterators.
|
|
676
852
|
// If prefix of the seek key `target` exists in the file, it must
|
|
@@ -692,11 +868,8 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|
|
692
868
|
}
|
|
693
869
|
|
|
694
870
|
void PrevImpl() override;
|
|
695
|
-
|
|
696
871
|
void NextImpl() override;
|
|
697
|
-
|
|
698
872
|
void SeekToFirstImpl() override;
|
|
699
|
-
|
|
700
873
|
void SeekToLastImpl() override;
|
|
701
874
|
|
|
702
875
|
private:
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#include "db/dbformat.h"
|
|
30
30
|
#include "index_builder.h"
|
|
31
31
|
#include "logging/logging.h"
|
|
32
|
-
#include "memory/
|
|
32
|
+
#include "memory/memory_allocator_impl.h"
|
|
33
33
|
#include "rocksdb/cache.h"
|
|
34
34
|
#include "rocksdb/comparator.h"
|
|
35
35
|
#include "rocksdb/env.h"
|
|
@@ -104,9 +104,12 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
|
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
-
bool GoodCompressionRatio(size_t compressed_size, size_t uncomp_size
|
|
108
|
-
|
|
109
|
-
|
|
107
|
+
bool GoodCompressionRatio(size_t compressed_size, size_t uncomp_size,
|
|
108
|
+
int max_compressed_bytes_per_kb) {
|
|
109
|
+
// For efficiency, avoid floating point and division
|
|
110
|
+
return compressed_size <=
|
|
111
|
+
(static_cast<uint64_t>(max_compressed_bytes_per_kb) * uncomp_size) >>
|
|
112
|
+
10;
|
|
110
113
|
}
|
|
111
114
|
|
|
112
115
|
} // namespace
|
|
@@ -114,7 +117,7 @@ bool GoodCompressionRatio(size_t compressed_size, size_t uncomp_size) {
|
|
|
114
117
|
// format_version is the block format as defined in include/rocksdb/table.h
|
|
115
118
|
Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
|
|
116
119
|
CompressionType* type, uint32_t format_version,
|
|
117
|
-
bool
|
|
120
|
+
bool allow_sample, std::string* compressed_output,
|
|
118
121
|
std::string* sampled_output_fast,
|
|
119
122
|
std::string* sampled_output_slow) {
|
|
120
123
|
assert(type);
|
|
@@ -126,7 +129,7 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
|
|
|
126
129
|
// The users can use these stats to decide if it is worthwhile
|
|
127
130
|
// enabling compression and they also get a hint about which
|
|
128
131
|
// compression algorithm wil be beneficial.
|
|
129
|
-
if (
|
|
132
|
+
if (allow_sample && info.SampleForCompression() &&
|
|
130
133
|
Random::GetTLSInstance()->OneIn(
|
|
131
134
|
static_cast<int>(info.SampleForCompression()))) {
|
|
132
135
|
// Sampling with a fast compression algorithm
|
|
@@ -159,7 +162,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
|
|
|
159
162
|
}
|
|
160
163
|
}
|
|
161
164
|
|
|
162
|
-
|
|
165
|
+
int max_compressed_bytes_per_kb = info.options().max_compressed_bytes_per_kb;
|
|
166
|
+
if (info.type() == kNoCompression || max_compressed_bytes_per_kb <= 0) {
|
|
163
167
|
*type = kNoCompression;
|
|
164
168
|
return uncompressed_data;
|
|
165
169
|
}
|
|
@@ -175,8 +179,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
|
|
|
175
179
|
|
|
176
180
|
// Check the compression ratio; if it's not good enough, just fall back to
|
|
177
181
|
// uncompressed
|
|
178
|
-
if (!GoodCompressionRatio(compressed_output->size(),
|
|
179
|
-
|
|
182
|
+
if (!GoodCompressionRatio(compressed_output->size(), uncompressed_data.size(),
|
|
183
|
+
max_compressed_bytes_per_kb)) {
|
|
180
184
|
*type = kNoCompression;
|
|
181
185
|
return uncompressed_data;
|
|
182
186
|
}
|
|
@@ -337,6 +341,10 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
337
341
|
std::unique_ptr<ParallelCompressionRep> pc_rep;
|
|
338
342
|
BlockCreateContext create_context;
|
|
339
343
|
|
|
344
|
+
// The size of the "tail" part of a SST file. "Tail" refers to
|
|
345
|
+
// all blocks after data blocks till the end of the SST file.
|
|
346
|
+
uint64_t tail_size;
|
|
347
|
+
|
|
340
348
|
uint64_t get_offset() { return offset.load(std::memory_order_relaxed); }
|
|
341
349
|
void set_offset(uint64_t o) { offset.store(o, std::memory_order_relaxed); }
|
|
342
350
|
|
|
@@ -446,7 +454,13 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
446
454
|
table_options, data_block)),
|
|
447
455
|
create_context(&table_options, ioptions.stats,
|
|
448
456
|
compression_type == kZSTD ||
|
|
449
|
-
compression_type == kZSTDNotFinalCompression
|
|
457
|
+
compression_type == kZSTDNotFinalCompression,
|
|
458
|
+
tbo.moptions.block_protection_bytes_per_key,
|
|
459
|
+
tbo.internal_comparator.user_comparator(),
|
|
460
|
+
!use_delta_encoding_for_index_values,
|
|
461
|
+
table_opt.index_type ==
|
|
462
|
+
BlockBasedTableOptions::kBinarySearchWithFirstKey),
|
|
463
|
+
tail_size(0),
|
|
450
464
|
status_ok(true),
|
|
451
465
|
io_status_ok(true) {
|
|
452
466
|
if (tbo.target_file_size == 0) {
|
|
@@ -1108,25 +1122,17 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
|
|
|
1108
1122
|
const CompressionContext& compression_ctx, UncompressionContext* verify_ctx,
|
|
1109
1123
|
std::string* compressed_output, Slice* block_contents,
|
|
1110
1124
|
CompressionType* type, Status* out_status) {
|
|
1111
|
-
// File format contains a sequence of blocks where each block has:
|
|
1112
|
-
// block_data: uint8[n]
|
|
1113
|
-
// type: uint8
|
|
1114
|
-
// crc: uint32
|
|
1115
1125
|
Rep* r = rep_;
|
|
1116
1126
|
bool is_status_ok = ok();
|
|
1117
1127
|
if (!r->IsParallelCompressionEnabled()) {
|
|
1118
1128
|
assert(is_status_ok);
|
|
1119
1129
|
}
|
|
1120
1130
|
|
|
1121
|
-
*type = r->compression_type;
|
|
1122
|
-
uint64_t sample_for_compression = r->sample_for_compression;
|
|
1123
|
-
bool abort_compression = false;
|
|
1124
|
-
|
|
1125
|
-
StopWatchNano timer(
|
|
1126
|
-
r->ioptions.clock,
|
|
1127
|
-
ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats));
|
|
1128
|
-
|
|
1129
1131
|
if (is_status_ok && uncompressed_block_data.size() < kCompressionSizeLimit) {
|
|
1132
|
+
StopWatchNano timer(
|
|
1133
|
+
r->ioptions.clock,
|
|
1134
|
+
ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats));
|
|
1135
|
+
|
|
1130
1136
|
if (is_data_block) {
|
|
1131
1137
|
r->compressible_input_data_bytes.fetch_add(uncompressed_block_data.size(),
|
|
1132
1138
|
std::memory_order_relaxed);
|
|
@@ -1139,14 +1145,14 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
|
|
|
1139
1145
|
}
|
|
1140
1146
|
assert(compression_dict != nullptr);
|
|
1141
1147
|
CompressionInfo compression_info(r->compression_opts, compression_ctx,
|
|
1142
|
-
*compression_dict,
|
|
1143
|
-
sample_for_compression);
|
|
1148
|
+
*compression_dict, r->compression_type,
|
|
1149
|
+
r->sample_for_compression);
|
|
1144
1150
|
|
|
1145
1151
|
std::string sampled_output_fast;
|
|
1146
1152
|
std::string sampled_output_slow;
|
|
1147
1153
|
*block_contents = CompressBlock(
|
|
1148
1154
|
uncompressed_block_data, compression_info, type,
|
|
1149
|
-
r->table_options.format_version, is_data_block /*
|
|
1155
|
+
r->table_options.format_version, is_data_block /* allow_sample */,
|
|
1150
1156
|
compressed_output, &sampled_output_fast, &sampled_output_slow);
|
|
1151
1157
|
|
|
1152
1158
|
if (sampled_output_slow.size() > 0 || sampled_output_fast.size() > 0) {
|
|
@@ -1179,35 +1185,38 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
|
|
|
1179
1185
|
BlockContents contents;
|
|
1180
1186
|
UncompressionInfo uncompression_info(*verify_ctx, *verify_dict,
|
|
1181
1187
|
r->compression_type);
|
|
1182
|
-
Status
|
|
1188
|
+
Status uncompress_status = UncompressBlockData(
|
|
1183
1189
|
uncompression_info, block_contents->data(), block_contents->size(),
|
|
1184
1190
|
&contents, r->table_options.format_version, r->ioptions);
|
|
1185
1191
|
|
|
1186
|
-
if (
|
|
1187
|
-
bool
|
|
1188
|
-
|
|
1189
|
-
if (!compressed_ok) {
|
|
1192
|
+
if (uncompress_status.ok()) {
|
|
1193
|
+
bool data_match = contents.data.compare(uncompressed_block_data) == 0;
|
|
1194
|
+
if (!data_match) {
|
|
1190
1195
|
// The result of the compression was invalid. abort.
|
|
1191
|
-
abort_compression = true;
|
|
1192
1196
|
const char* const msg =
|
|
1193
1197
|
"Decompressed block did not match pre-compression block";
|
|
1194
1198
|
ROCKS_LOG_ERROR(r->ioptions.logger, "%s", msg);
|
|
1195
1199
|
*out_status = Status::Corruption(msg);
|
|
1200
|
+
*type = kNoCompression;
|
|
1196
1201
|
}
|
|
1197
1202
|
} else {
|
|
1198
1203
|
// Decompression reported an error. abort.
|
|
1199
1204
|
*out_status = Status::Corruption(std::string("Could not decompress: ") +
|
|
1200
|
-
|
|
1201
|
-
|
|
1205
|
+
uncompress_status.getState());
|
|
1206
|
+
*type = kNoCompression;
|
|
1202
1207
|
}
|
|
1203
1208
|
}
|
|
1209
|
+
if (timer.IsStarted()) {
|
|
1210
|
+
RecordTimeToHistogram(r->ioptions.stats, COMPRESSION_TIMES_NANOS,
|
|
1211
|
+
timer.ElapsedNanos());
|
|
1212
|
+
}
|
|
1204
1213
|
} else {
|
|
1205
|
-
//
|
|
1214
|
+
// Status is not OK, or block is too big to be compressed.
|
|
1206
1215
|
if (is_data_block) {
|
|
1207
1216
|
r->uncompressible_input_data_bytes.fetch_add(
|
|
1208
1217
|
uncompressed_block_data.size(), std::memory_order_relaxed);
|
|
1209
1218
|
}
|
|
1210
|
-
|
|
1219
|
+
*type = kNoCompression;
|
|
1211
1220
|
}
|
|
1212
1221
|
if (is_data_block) {
|
|
1213
1222
|
r->uncompressible_input_data_bytes.fetch_add(kBlockTrailerSize,
|
|
@@ -1216,26 +1225,32 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
|
|
|
1216
1225
|
|
|
1217
1226
|
// Abort compression if the block is too big, or did not pass
|
|
1218
1227
|
// verification.
|
|
1219
|
-
if (
|
|
1220
|
-
RecordTick(r->ioptions.stats, NUMBER_BLOCK_NOT_COMPRESSED);
|
|
1221
|
-
*type = kNoCompression;
|
|
1228
|
+
if (*type == kNoCompression) {
|
|
1222
1229
|
*block_contents = uncompressed_block_data;
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
+
bool compression_attempted = !compressed_output->empty();
|
|
1231
|
+
RecordTick(r->ioptions.stats, compression_attempted
|
|
1232
|
+
? NUMBER_BLOCK_COMPRESSION_REJECTED
|
|
1233
|
+
: NUMBER_BLOCK_COMPRESSION_BYPASSED);
|
|
1234
|
+
RecordTick(r->ioptions.stats,
|
|
1235
|
+
compression_attempted ? BYTES_COMPRESSION_REJECTED
|
|
1236
|
+
: BYTES_COMPRESSION_BYPASSED,
|
|
1237
|
+
uncompressed_block_data.size());
|
|
1238
|
+
} else {
|
|
1230
1239
|
RecordTick(r->ioptions.stats, NUMBER_BLOCK_COMPRESSED);
|
|
1231
|
-
|
|
1232
|
-
|
|
1240
|
+
RecordTick(r->ioptions.stats, BYTES_COMPRESSED_FROM,
|
|
1241
|
+
uncompressed_block_data.size());
|
|
1242
|
+
RecordTick(r->ioptions.stats, BYTES_COMPRESSED_TO,
|
|
1243
|
+
compressed_output->size());
|
|
1233
1244
|
}
|
|
1234
1245
|
}
|
|
1235
1246
|
|
|
1236
1247
|
void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
|
|
1237
|
-
const Slice& block_contents, CompressionType
|
|
1248
|
+
const Slice& block_contents, CompressionType comp_type, BlockHandle* handle,
|
|
1238
1249
|
BlockType block_type, const Slice* uncompressed_block_data) {
|
|
1250
|
+
// File format contains a sequence of blocks where each block has:
|
|
1251
|
+
// block_data: uint8[n]
|
|
1252
|
+
// compression_type: uint8
|
|
1253
|
+
// checksum: uint32
|
|
1239
1254
|
Rep* r = rep_;
|
|
1240
1255
|
bool is_data_block = block_type == BlockType::kData;
|
|
1241
1256
|
// Old, misleading name of this function: WriteRawBlock
|
|
@@ -1246,7 +1261,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
|
|
|
1246
1261
|
assert(io_status().ok());
|
|
1247
1262
|
if (uncompressed_block_data == nullptr) {
|
|
1248
1263
|
uncompressed_block_data = &block_contents;
|
|
1249
|
-
assert(
|
|
1264
|
+
assert(comp_type == kNoCompression);
|
|
1250
1265
|
}
|
|
1251
1266
|
|
|
1252
1267
|
{
|
|
@@ -1258,10 +1273,10 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
|
|
|
1258
1273
|
}
|
|
1259
1274
|
|
|
1260
1275
|
std::array<char, kBlockTrailerSize> trailer;
|
|
1261
|
-
trailer[0] =
|
|
1276
|
+
trailer[0] = comp_type;
|
|
1262
1277
|
uint32_t checksum = ComputeBuiltinChecksumWithLastByte(
|
|
1263
1278
|
r->table_options.checksum, block_contents.data(), block_contents.size(),
|
|
1264
|
-
/*last_byte*/
|
|
1279
|
+
/*last_byte*/ comp_type);
|
|
1265
1280
|
|
|
1266
1281
|
if (block_type == BlockType::kFilter) {
|
|
1267
1282
|
Status s = r->filter_builder->MaybePostVerifyFilter(block_contents);
|
|
@@ -1898,6 +1913,8 @@ Status BlockBasedTableBuilder::Finish() {
|
|
|
1898
1913
|
}
|
|
1899
1914
|
}
|
|
1900
1915
|
|
|
1916
|
+
r->props.tail_start_offset = r->offset;
|
|
1917
|
+
|
|
1901
1918
|
// Write meta blocks, metaindex block and footer in the following order.
|
|
1902
1919
|
// 1. [meta block: filter]
|
|
1903
1920
|
// 2. [meta block: index]
|
|
@@ -1925,6 +1942,7 @@ Status BlockBasedTableBuilder::Finish() {
|
|
|
1925
1942
|
r->SetStatus(r->CopyIOStatus());
|
|
1926
1943
|
Status ret_status = r->CopyStatus();
|
|
1927
1944
|
assert(!ret_status.ok() || io_status().ok());
|
|
1945
|
+
r->tail_size = r->offset - r->props.tail_start_offset;
|
|
1928
1946
|
return ret_status;
|
|
1929
1947
|
}
|
|
1930
1948
|
|
|
@@ -1958,6 +1976,8 @@ uint64_t BlockBasedTableBuilder::EstimatedFileSize() const {
|
|
|
1958
1976
|
}
|
|
1959
1977
|
}
|
|
1960
1978
|
|
|
1979
|
+
uint64_t BlockBasedTableBuilder::GetTailSize() const { return rep_->tail_size; }
|
|
1980
|
+
|
|
1961
1981
|
bool BlockBasedTableBuilder::NeedCompact() const {
|
|
1962
1982
|
for (const auto& collector : rep_->table_properties_collectors) {
|
|
1963
1983
|
if (collector->NeedCompact()) {
|