@nxtedition/rocksdb 7.1.20 → 7.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -6
- package/deps/rocksdb/rocksdb/Makefile +1 -1
- package/deps/rocksdb/rocksdb/TARGETS +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +1 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +4 -4
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +139 -161
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +92 -82
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +16 -3
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -3
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +73 -30
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +25 -67
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +41 -40
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +109 -155
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +127 -149
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +75 -80
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +22 -172
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +272 -85
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +13 -4
- package/deps/rocksdb/rocksdb/db/builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +15 -1
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +25 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +10 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +22 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +14 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +38 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +408 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +244 -54
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +27 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +25 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +87 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +61 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -10
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +272 -0
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +38 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +69 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +29 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +0 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +10 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +35 -22
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +40 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +22 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +72 -5
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +60 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +170 -1
- package/deps/rocksdb/rocksdb/db/db_test2.cc +9 -3
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +19 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +32 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +444 -3
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/dbformat.cc +13 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +59 -4
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +3 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +24 -3
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/filename_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +4 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +85 -43
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +3 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +33 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +6 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/log_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +158 -56
- package/deps/rocksdb/rocksdb/db/memtable.h +2 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +52 -9
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +31 -2
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +81 -42
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +78 -12
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +21 -4
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +4 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +18 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +15 -7
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +28 -9
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +21 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +30 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +4 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +30 -7
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +24 -13
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/write_controller_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +104 -60
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +199 -108
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +39 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +26 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -118
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +24 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +129 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +22 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +312 -117
- package/deps/rocksdb/rocksdb/env/env_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +10 -2
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +1 -0
- package/deps/rocksdb/rocksdb/env/mock_env_test.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +5 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +1 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +49 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +44 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +8 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -6
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -0
- package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +1 -0
- package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memory/arena.cc +23 -88
- package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +61 -0
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +8 -0
- package/deps/rocksdb/rocksdb/options/configurable_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +63 -40
- package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +7 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +4 -1
- package/deps/rocksdb/rocksdb/port/win/port_win.h +2 -7
- package/deps/rocksdb/rocksdb/src.mk +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +7 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +19 -1
- package/deps/rocksdb/rocksdb/table/get_context.h +9 -0
- package/deps/rocksdb/rocksdb/table/merger_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -11
- package/deps/rocksdb/rocksdb/table/mock_table.cc +37 -19
- package/deps/rocksdb/rocksdb/table/mock_table.h +5 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +6 -0
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +33 -0
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +45 -6
- package/deps/rocksdb/rocksdb/test_util/testharness.h +2 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +5 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +36 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/coding_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/crc32c_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +12 -7
- package/deps/rocksdb/rocksdb/util/hash_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/heap_test.cc +4 -2
- package/deps/rocksdb/rocksdb/util/random_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/thread_local_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/timer_test.cc +1 -0
- package/deps/rocksdb/rocksdb/util/work_queue_test.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +13 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +8 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +20 -0
- package/index.js +12 -4
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -76,14 +76,131 @@ IOStatus CompactionOutputs::WriterSyncClose(const Status& input_status,
|
|
|
76
76
|
return io_s;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
|
|
80
|
+
const Slice& internal_key) {
|
|
81
|
+
size_t curr_key_boundary_switched_num = 0;
|
|
82
|
+
const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
|
|
83
|
+
|
|
84
|
+
if (grandparents.empty()) {
|
|
85
|
+
return curr_key_boundary_switched_num;
|
|
86
|
+
}
|
|
87
|
+
assert(!internal_key.empty());
|
|
88
|
+
InternalKey ikey;
|
|
89
|
+
ikey.DecodeFrom(internal_key);
|
|
90
|
+
assert(ikey.Valid());
|
|
91
|
+
|
|
92
|
+
const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
|
|
93
|
+
|
|
94
|
+
// Move the grandparent_index_ to the file containing the current user_key.
|
|
95
|
+
// If there are multiple files containing the same user_key, make sure the
|
|
96
|
+
// index points to the last file containing the key.
|
|
97
|
+
while (grandparent_index_ < grandparents.size()) {
|
|
98
|
+
if (being_grandparent_gap_) {
|
|
99
|
+
if (sstableKeyCompare(ucmp, ikey,
|
|
100
|
+
grandparents[grandparent_index_]->smallest) < 0) {
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
if (seen_key_) {
|
|
104
|
+
curr_key_boundary_switched_num++;
|
|
105
|
+
grandparent_overlapped_bytes_ +=
|
|
106
|
+
grandparents[grandparent_index_]->fd.GetFileSize();
|
|
107
|
+
grandparent_boundary_switched_num_++;
|
|
108
|
+
}
|
|
109
|
+
being_grandparent_gap_ = false;
|
|
110
|
+
} else {
|
|
111
|
+
int cmp_result = sstableKeyCompare(
|
|
112
|
+
ucmp, ikey, grandparents[grandparent_index_]->largest);
|
|
113
|
+
// If it's same key, make sure grandparent_index_ is pointing to the last
|
|
114
|
+
// one.
|
|
115
|
+
if (cmp_result < 0 ||
|
|
116
|
+
(cmp_result == 0 &&
|
|
117
|
+
(grandparent_index_ == grandparents.size() - 1 ||
|
|
118
|
+
sstableKeyCompare(ucmp, ikey,
|
|
119
|
+
grandparents[grandparent_index_ + 1]->smallest) <
|
|
120
|
+
0))) {
|
|
121
|
+
break;
|
|
122
|
+
}
|
|
123
|
+
if (seen_key_) {
|
|
124
|
+
curr_key_boundary_switched_num++;
|
|
125
|
+
grandparent_boundary_switched_num_++;
|
|
126
|
+
}
|
|
127
|
+
being_grandparent_gap_ = true;
|
|
128
|
+
grandparent_index_++;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// If the first key is in the middle of a grandparent file, adding it to the
|
|
133
|
+
// overlap
|
|
134
|
+
if (!seen_key_ && !being_grandparent_gap_) {
|
|
135
|
+
assert(grandparent_overlapped_bytes_ == 0);
|
|
136
|
+
grandparent_overlapped_bytes_ =
|
|
137
|
+
GetCurrentKeyGrandparentOverlappedBytes(internal_key);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
seen_key_ = true;
|
|
141
|
+
return curr_key_boundary_switched_num;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
uint64_t CompactionOutputs::GetCurrentKeyGrandparentOverlappedBytes(
|
|
145
|
+
const Slice& internal_key) const {
|
|
146
|
+
// no overlap with any grandparent file
|
|
147
|
+
if (being_grandparent_gap_) {
|
|
148
|
+
return 0;
|
|
149
|
+
}
|
|
150
|
+
uint64_t overlapped_bytes = 0;
|
|
151
|
+
|
|
152
|
+
const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
|
|
153
|
+
const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
|
|
154
|
+
InternalKey ikey;
|
|
155
|
+
ikey.DecodeFrom(internal_key);
|
|
156
|
+
#ifndef NDEBUG
|
|
157
|
+
// make sure the grandparent_index_ is pointing to the last files containing
|
|
158
|
+
// the current key.
|
|
159
|
+
int cmp_result =
|
|
160
|
+
sstableKeyCompare(ucmp, ikey, grandparents[grandparent_index_]->largest);
|
|
161
|
+
assert(
|
|
162
|
+
cmp_result < 0 ||
|
|
163
|
+
(cmp_result == 0 &&
|
|
164
|
+
(grandparent_index_ == grandparents.size() - 1 ||
|
|
165
|
+
sstableKeyCompare(
|
|
166
|
+
ucmp, ikey, grandparents[grandparent_index_ + 1]->smallest) < 0)));
|
|
167
|
+
assert(sstableKeyCompare(ucmp, ikey,
|
|
168
|
+
grandparents[grandparent_index_]->smallest) >= 0);
|
|
169
|
+
#endif
|
|
170
|
+
overlapped_bytes += grandparents[grandparent_index_]->fd.GetFileSize();
|
|
171
|
+
|
|
172
|
+
// go backwards to find all overlapped files, one key can overlap multiple
|
|
173
|
+
// files. In the following example, if the current output key is `c`, and one
|
|
174
|
+
// compaction file was cut before `c`, current `c` can overlap with 3 files:
|
|
175
|
+
// [a b] [c...
|
|
176
|
+
// [b, b] [c, c] [c, c] [c, d]
|
|
177
|
+
for (int64_t i = static_cast<int64_t>(grandparent_index_) - 1;
|
|
178
|
+
i >= 0 && sstableKeyCompare(ucmp, ikey, grandparents[i]->largest) == 0;
|
|
179
|
+
i--) {
|
|
180
|
+
overlapped_bytes += grandparents[i]->fd.GetFileSize();
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return overlapped_bytes;
|
|
184
|
+
}
|
|
185
|
+
|
|
79
186
|
bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
|
|
80
187
|
assert(c_iter.Valid());
|
|
81
188
|
|
|
189
|
+
// always update grandparent information like overlapped file number, size
|
|
190
|
+
// etc.
|
|
191
|
+
const Slice& internal_key = c_iter.key();
|
|
192
|
+
const uint64_t previous_overlapped_bytes = grandparent_overlapped_bytes_;
|
|
193
|
+
size_t num_grandparent_boundaries_crossed =
|
|
194
|
+
UpdateGrandparentBoundaryInfo(internal_key);
|
|
195
|
+
|
|
196
|
+
if (!HasBuilder()) {
|
|
197
|
+
return false;
|
|
198
|
+
}
|
|
199
|
+
|
|
82
200
|
// If there's user defined partitioner, check that first
|
|
83
|
-
if (
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
current_output_file_size_)) == kRequired) {
|
|
201
|
+
if (partitioner_ && partitioner_->ShouldPartition(PartitionerRequest(
|
|
202
|
+
last_key_for_partitioner_, c_iter.user_key(),
|
|
203
|
+
current_output_file_size_)) == kRequired) {
|
|
87
204
|
return true;
|
|
88
205
|
}
|
|
89
206
|
|
|
@@ -92,12 +209,11 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
|
|
|
92
209
|
return false;
|
|
93
210
|
}
|
|
94
211
|
|
|
95
|
-
// reach the
|
|
212
|
+
// reach the max file size
|
|
96
213
|
if (current_output_file_size_ >= compaction_->max_output_file_size()) {
|
|
97
214
|
return true;
|
|
98
215
|
}
|
|
99
216
|
|
|
100
|
-
const Slice& internal_key = c_iter.key();
|
|
101
217
|
const InternalKeyComparator* icmp =
|
|
102
218
|
&compaction_->column_family_data()->internal_comparator();
|
|
103
219
|
|
|
@@ -111,32 +227,67 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
|
|
|
111
227
|
}
|
|
112
228
|
}
|
|
113
229
|
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
grandparant_file_switched = true;
|
|
230
|
+
// only check if the current key is going to cross the grandparents file
|
|
231
|
+
// boundary (either the file beginning or ending).
|
|
232
|
+
if (num_grandparent_boundaries_crossed > 0) {
|
|
233
|
+
// Cut the file before the current key if the size of the current output
|
|
234
|
+
// file + its overlapped grandparent files is bigger than
|
|
235
|
+
// max_compaction_bytes. Which is to prevent future bigger than
|
|
236
|
+
// max_compaction_bytes compaction from the current output level.
|
|
237
|
+
if (grandparent_overlapped_bytes_ + current_output_file_size_ >
|
|
238
|
+
compaction_->max_compaction_bytes()) {
|
|
239
|
+
return true;
|
|
125
240
|
}
|
|
126
|
-
assert(grandparent_index_ + 1 >= grandparents.size() ||
|
|
127
|
-
icmp->Compare(
|
|
128
|
-
grandparents[grandparent_index_]->largest.Encode(),
|
|
129
|
-
grandparents[grandparent_index_ + 1]->smallest.Encode()) <= 0);
|
|
130
|
-
grandparent_index_++;
|
|
131
|
-
}
|
|
132
|
-
seen_key_ = true;
|
|
133
241
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
//
|
|
138
|
-
|
|
139
|
-
|
|
242
|
+
// Cut the file if including the key is going to add a skippable file on
|
|
243
|
+
// the grandparent level AND its size is reasonably big (1/8 of target file
|
|
244
|
+
// size). For example, if it's compacting the files L0 + L1:
|
|
245
|
+
// L0: [1, 21]
|
|
246
|
+
// L1: [3, 23]
|
|
247
|
+
// L2: [2, 4] [11, 15] [22, 24]
|
|
248
|
+
// Without this break, it will output as:
|
|
249
|
+
// L1: [1,3, 21,23]
|
|
250
|
+
// With this break, it will output as (assuming [11, 15] at L2 is bigger
|
|
251
|
+
// than 1/8 of target size):
|
|
252
|
+
// L1: [1,3] [21,23]
|
|
253
|
+
// Then for the future compactions, [11,15] won't be included.
|
|
254
|
+
// For random datasets (either evenly distributed or skewed), it rarely
|
|
255
|
+
// triggers this condition, but if the user is adding 2 different datasets
|
|
256
|
+
// without any overlap, it may likely happen.
|
|
257
|
+
// More details, check PR #1963
|
|
258
|
+
const size_t num_skippable_boundaries_crossed =
|
|
259
|
+
being_grandparent_gap_ ? 2 : 3;
|
|
260
|
+
if (compaction_->immutable_options()->compaction_style ==
|
|
261
|
+
kCompactionStyleLevel &&
|
|
262
|
+
compaction_->immutable_options()->level_compaction_dynamic_file_size &&
|
|
263
|
+
num_grandparent_boundaries_crossed >=
|
|
264
|
+
num_skippable_boundaries_crossed &&
|
|
265
|
+
grandparent_overlapped_bytes_ - previous_overlapped_bytes >
|
|
266
|
+
compaction_->target_output_file_size() / 8) {
|
|
267
|
+
return true;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Pre-cut the output file if it's reaching a certain size AND it's at the
|
|
271
|
+
// boundary of a grandparent file. It can reduce the future compaction size,
|
|
272
|
+
// the cost is having smaller files.
|
|
273
|
+
// The pre-cut size threshold is based on how many grandparent boundaries
|
|
274
|
+
// it has seen before. Basically, if it has seen no boundary at all, then it
|
|
275
|
+
// will pre-cut at 50% target file size. Every boundary it has seen
|
|
276
|
+
// increases the threshold by 5%, max at 90%, which it will always cut.
|
|
277
|
+
// The idea is based on if it has seen more boundaries before, it will more
|
|
278
|
+
// likely to see another boundary (file cutting opportunity) before the
|
|
279
|
+
// target file size. The test shows it can generate larger files than a
|
|
280
|
+
// static threshold like 75% and has a similar write amplification
|
|
281
|
+
// improvement.
|
|
282
|
+
if (compaction_->immutable_options()->compaction_style ==
|
|
283
|
+
kCompactionStyleLevel &&
|
|
284
|
+
compaction_->immutable_options()->level_compaction_dynamic_file_size &&
|
|
285
|
+
current_output_file_size_ >=
|
|
286
|
+
((compaction_->target_output_file_size() + 99) / 100) *
|
|
287
|
+
(50 + std::min(grandparent_boundary_switched_num_ * 5,
|
|
288
|
+
size_t{40}))) {
|
|
289
|
+
return true;
|
|
290
|
+
}
|
|
140
291
|
}
|
|
141
292
|
|
|
142
293
|
// check ttl file boundaries if there's any
|
|
@@ -189,6 +340,10 @@ Status CompactionOutputs::AddToOutput(
|
|
|
189
340
|
if (!s.ok()) {
|
|
190
341
|
return s;
|
|
191
342
|
}
|
|
343
|
+
// reset grandparent information
|
|
344
|
+
grandparent_boundary_switched_num_ = 0;
|
|
345
|
+
grandparent_overlapped_bytes_ =
|
|
346
|
+
GetCurrentKeyGrandparentOverlappedBytes(key);
|
|
192
347
|
}
|
|
193
348
|
|
|
194
349
|
// Open output file if necessary
|
|
@@ -199,10 +354,9 @@ Status CompactionOutputs::AddToOutput(
|
|
|
199
354
|
}
|
|
200
355
|
}
|
|
201
356
|
|
|
202
|
-
Output& curr = current_output();
|
|
203
357
|
assert(builder_ != nullptr);
|
|
204
358
|
const Slice& value = c_iter.value();
|
|
205
|
-
s =
|
|
359
|
+
s = current_output().validator.Add(key, value);
|
|
206
360
|
if (!s.ok()) {
|
|
207
361
|
return s;
|
|
208
362
|
}
|
|
@@ -232,10 +386,10 @@ Status CompactionOutputs::AddToOutput(
|
|
|
232
386
|
}
|
|
233
387
|
|
|
234
388
|
Status CompactionOutputs::AddRangeDels(
|
|
235
|
-
const Slice*
|
|
389
|
+
const Slice* comp_start_user_key, const Slice* comp_end_user_key,
|
|
236
390
|
CompactionIterationStats& range_del_out_stats, bool bottommost_level,
|
|
237
391
|
const InternalKeyComparator& icmp, SequenceNumber earliest_snapshot,
|
|
238
|
-
const Slice& next_table_min_key) {
|
|
392
|
+
const Slice& next_table_min_key, const std::string& full_history_ts_low) {
|
|
239
393
|
assert(HasRangeDel());
|
|
240
394
|
FileMetaData& meta = current_output().meta;
|
|
241
395
|
const Comparator* ucmp = icmp.user_comparator();
|
|
@@ -249,7 +403,7 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
249
403
|
if (output_size == 1) {
|
|
250
404
|
// For the first output table, include range tombstones before the min
|
|
251
405
|
// key but after the subcompaction boundary.
|
|
252
|
-
lower_bound =
|
|
406
|
+
lower_bound = comp_start_user_key;
|
|
253
407
|
lower_bound_from_sub_compact = true;
|
|
254
408
|
} else if (meta.smallest.size() > 0) {
|
|
255
409
|
// For subsequent output tables, only include range tombstones from min
|
|
@@ -269,21 +423,22 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
269
423
|
// use the smaller key as the upper bound of the output file, to ensure
|
|
270
424
|
// that there is no overlapping between different output files.
|
|
271
425
|
upper_bound_guard = ExtractUserKey(next_table_min_key);
|
|
272
|
-
if (
|
|
273
|
-
ucmp->
|
|
274
|
-
|
|
426
|
+
if (comp_end_user_key != nullptr &&
|
|
427
|
+
ucmp->CompareWithoutTimestamp(upper_bound_guard, *comp_end_user_key) >=
|
|
428
|
+
0) {
|
|
429
|
+
upper_bound = comp_end_user_key;
|
|
275
430
|
} else {
|
|
276
431
|
upper_bound = &upper_bound_guard;
|
|
277
432
|
}
|
|
278
433
|
} else {
|
|
279
434
|
// This is the last file in the subcompaction, so extend until the
|
|
280
435
|
// subcompaction ends.
|
|
281
|
-
upper_bound =
|
|
436
|
+
upper_bound = comp_end_user_key;
|
|
282
437
|
}
|
|
283
438
|
bool has_overlapping_endpoints;
|
|
284
439
|
if (upper_bound != nullptr && meta.largest.size() > 0) {
|
|
285
|
-
has_overlapping_endpoints =
|
|
286
|
-
|
|
440
|
+
has_overlapping_endpoints = ucmp->CompareWithoutTimestamp(
|
|
441
|
+
meta.largest.user_key(), *upper_bound) == 0;
|
|
287
442
|
} else {
|
|
288
443
|
has_overlapping_endpoints = false;
|
|
289
444
|
}
|
|
@@ -292,8 +447,8 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
292
447
|
// bound. If the end of subcompaction is null or the upper bound is null,
|
|
293
448
|
// it means that this file is the last file in the compaction. So there
|
|
294
449
|
// will be no overlapping between this file and others.
|
|
295
|
-
assert(
|
|
296
|
-
ucmp->
|
|
450
|
+
assert(comp_end_user_key == nullptr || upper_bound == nullptr ||
|
|
451
|
+
ucmp->CompareWithoutTimestamp(*upper_bound, *comp_end_user_key) <= 0);
|
|
297
452
|
auto it = range_del_agg_->NewIterator(lower_bound, upper_bound,
|
|
298
453
|
has_overlapping_endpoints);
|
|
299
454
|
// Position the range tombstone output iterator. There may be tombstone
|
|
@@ -307,7 +462,8 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
307
462
|
for (; it->Valid(); it->Next()) {
|
|
308
463
|
auto tombstone = it->Tombstone();
|
|
309
464
|
if (upper_bound != nullptr) {
|
|
310
|
-
int cmp =
|
|
465
|
+
int cmp =
|
|
466
|
+
ucmp->CompareWithoutTimestamp(*upper_bound, tombstone.start_key_);
|
|
311
467
|
if ((has_overlapping_endpoints && cmp < 0) ||
|
|
312
468
|
(!has_overlapping_endpoints && cmp <= 0)) {
|
|
313
469
|
// Tombstones starting after upper_bound only need to be included in
|
|
@@ -320,7 +476,17 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
320
476
|
}
|
|
321
477
|
}
|
|
322
478
|
|
|
323
|
-
|
|
479
|
+
const size_t ts_sz = ucmp->timestamp_size();
|
|
480
|
+
// Garbage collection for range tombstones.
|
|
481
|
+
// If user-defined timestamp is enabled, range tombstones are dropped if
|
|
482
|
+
// they are at bottommost_level, below full_history_ts_low and not visible
|
|
483
|
+
// in any snapshot. trim_ts_ is passed to the constructor for
|
|
484
|
+
// range_del_agg_, and range_del_agg_ internally drops tombstones above
|
|
485
|
+
// trim_ts_.
|
|
486
|
+
if (bottommost_level && tombstone.seq_ <= earliest_snapshot &&
|
|
487
|
+
(ts_sz == 0 ||
|
|
488
|
+
(!full_history_ts_low.empty() &&
|
|
489
|
+
ucmp->CompareTimestamp(tombstone.ts_, full_history_ts_low) < 0))) {
|
|
324
490
|
// TODO(andrewkr): tombstones that span multiple output files are
|
|
325
491
|
// counted for each compaction output file, so lots of double
|
|
326
492
|
// counting.
|
|
@@ -331,12 +497,13 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
331
497
|
|
|
332
498
|
auto kv = tombstone.Serialize();
|
|
333
499
|
assert(lower_bound == nullptr ||
|
|
334
|
-
ucmp->
|
|
500
|
+
ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0);
|
|
335
501
|
// Range tombstone is not supported by output validator yet.
|
|
336
502
|
builder_->Add(kv.first.Encode(), kv.second);
|
|
337
503
|
InternalKey smallest_candidate = std::move(kv.first);
|
|
338
504
|
if (lower_bound != nullptr &&
|
|
339
|
-
ucmp->
|
|
505
|
+
ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(),
|
|
506
|
+
*lower_bound) <= 0) {
|
|
340
507
|
// Pretend the smallest key has the same user key as lower_bound
|
|
341
508
|
// (the max key in the previous table or subcompaction) in order for
|
|
342
509
|
// files to appear key-space partitioned.
|
|
@@ -356,13 +523,23 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
356
523
|
// choose lowest seqnum so this file's smallest internal key comes
|
|
357
524
|
// after the previous file's largest. The fake seqnum is OK because
|
|
358
525
|
// the read path's file-picking code only considers user key.
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
526
|
+
if (lower_bound_from_sub_compact) {
|
|
527
|
+
if (ts_sz) {
|
|
528
|
+
assert(tombstone.ts_.size() == ts_sz);
|
|
529
|
+
smallest_candidate = InternalKey(*lower_bound, tombstone.seq_,
|
|
530
|
+
kTypeRangeDeletion, tombstone.ts_);
|
|
531
|
+
} else {
|
|
532
|
+
smallest_candidate =
|
|
533
|
+
InternalKey(*lower_bound, tombstone.seq_, kTypeRangeDeletion);
|
|
534
|
+
}
|
|
535
|
+
} else {
|
|
536
|
+
smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
|
|
537
|
+
}
|
|
362
538
|
}
|
|
363
539
|
InternalKey largest_candidate = tombstone.SerializeEndKey();
|
|
364
540
|
if (upper_bound != nullptr &&
|
|
365
|
-
ucmp->
|
|
541
|
+
ucmp->CompareWithoutTimestamp(*upper_bound,
|
|
542
|
+
largest_candidate.user_key()) <= 0) {
|
|
366
543
|
// Pretend the largest key has the same user key as upper_bound (the
|
|
367
544
|
// min key in the following table or subcompaction) in order for files
|
|
368
545
|
// to appear key-space partitioned.
|
|
@@ -376,9 +553,22 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
376
553
|
// kMaxSequenceNumber), but with kTypeDeletion (0x7) instead of
|
|
377
554
|
// kTypeRangeDeletion (0xF), so the range tombstone comes before the
|
|
378
555
|
// Seek() key in InternalKey's ordering. So Seek() will look in the
|
|
379
|
-
// next file for the user key
|
|
380
|
-
|
|
381
|
-
|
|
556
|
+
// next file for the user key
|
|
557
|
+
if (ts_sz) {
|
|
558
|
+
static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
|
|
559
|
+
if (ts_sz <= strlen(kTsMax)) {
|
|
560
|
+
largest_candidate =
|
|
561
|
+
InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion,
|
|
562
|
+
Slice(kTsMax, ts_sz));
|
|
563
|
+
} else {
|
|
564
|
+
largest_candidate =
|
|
565
|
+
InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion,
|
|
566
|
+
std::string(ts_sz, '\xff'));
|
|
567
|
+
}
|
|
568
|
+
} else {
|
|
569
|
+
largest_candidate =
|
|
570
|
+
InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion);
|
|
571
|
+
}
|
|
382
572
|
}
|
|
383
573
|
#ifndef NDEBUG
|
|
384
574
|
SequenceNumber smallest_ikey_seqnum = kMaxSequenceNumber;
|
|
@@ -168,11 +168,16 @@ class CompactionOutputs {
|
|
|
168
168
|
}
|
|
169
169
|
|
|
170
170
|
// Add range-dels from the aggregator to the current output file
|
|
171
|
-
|
|
171
|
+
// @param comp_start_user_key and comp_end_user_key include timestamp if
|
|
172
|
+
// user-defined timestamp is enabled.
|
|
173
|
+
// @param full_history_ts_low used for range tombstone garbage collection.
|
|
174
|
+
Status AddRangeDels(const Slice* comp_start_user_key,
|
|
175
|
+
const Slice* comp_end_user_key,
|
|
172
176
|
CompactionIterationStats& range_del_out_stats,
|
|
173
177
|
bool bottommost_level, const InternalKeyComparator& icmp,
|
|
174
178
|
SequenceNumber earliest_snapshot,
|
|
175
|
-
const Slice& next_table_min_key
|
|
179
|
+
const Slice& next_table_min_key,
|
|
180
|
+
const std::string& full_history_ts_low);
|
|
176
181
|
|
|
177
182
|
// if the outputs have range delete, range delete is also data
|
|
178
183
|
bool HasRangeDel() const {
|
|
@@ -216,9 +221,16 @@ class CompactionOutputs {
|
|
|
216
221
|
}
|
|
217
222
|
}
|
|
218
223
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
224
|
+
// update tracked grandparents information like grandparent index, if it's
|
|
225
|
+
// in the gap between 2 grandparent files, accumulated grandparent files size
|
|
226
|
+
// etc.
|
|
227
|
+
// It returns how many boundaries it crosses by including current key.
|
|
228
|
+
size_t UpdateGrandparentBoundaryInfo(const Slice& internal_key);
|
|
229
|
+
|
|
230
|
+
// helper function to get the overlapped grandparent files size, it's only
|
|
231
|
+
// used for calculating the first key's overlap.
|
|
232
|
+
uint64_t GetCurrentKeyGrandparentOverlappedBytes(
|
|
233
|
+
const Slice& internal_key) const;
|
|
222
234
|
|
|
223
235
|
// Add current key from compaction_iterator to the output file. If needed
|
|
224
236
|
// close and open new compaction output with the functions provided.
|
|
@@ -311,12 +323,21 @@ class CompactionOutputs {
|
|
|
311
323
|
// An index that used to speed up ShouldStopBefore().
|
|
312
324
|
size_t grandparent_index_ = 0;
|
|
313
325
|
|
|
326
|
+
// if the output key is being grandparent files gap, so:
|
|
327
|
+
// key > grandparents[grandparent_index_ - 1].largest &&
|
|
328
|
+
// key < grandparents[grandparent_index_].smallest
|
|
329
|
+
bool being_grandparent_gap_ = true;
|
|
330
|
+
|
|
314
331
|
// The number of bytes overlapping between the current output and
|
|
315
332
|
// grandparent files used in ShouldStopBefore().
|
|
316
|
-
uint64_t
|
|
333
|
+
uint64_t grandparent_overlapped_bytes_ = 0;
|
|
317
334
|
|
|
318
335
|
// A flag determines whether the key has been seen in ShouldStopBefore()
|
|
319
336
|
bool seen_key_ = false;
|
|
337
|
+
|
|
338
|
+
// for the current output file, how many file boundaries has it crossed,
|
|
339
|
+
// basically number of files overlapped * 2
|
|
340
|
+
size_t grandparent_boundary_switched_num_ = 0;
|
|
320
341
|
};
|
|
321
342
|
|
|
322
343
|
// helper struct to concatenate the last level and penultimate level outputs
|
|
@@ -27,16 +27,6 @@
|
|
|
27
27
|
|
|
28
28
|
namespace ROCKSDB_NAMESPACE {
|
|
29
29
|
|
|
30
|
-
namespace {
|
|
31
|
-
uint64_t TotalCompensatedFileSize(const std::vector<FileMetaData*>& files) {
|
|
32
|
-
uint64_t sum = 0;
|
|
33
|
-
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
|
34
|
-
sum += files[i]->compensated_file_size;
|
|
35
|
-
}
|
|
36
|
-
return sum;
|
|
37
|
-
}
|
|
38
|
-
} // anonymous namespace
|
|
39
|
-
|
|
40
30
|
bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
|
|
41
31
|
size_t min_files_to_compact,
|
|
42
32
|
uint64_t max_compact_bytes_per_del_file,
|
|
@@ -63,8 +53,6 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
|
|
|
63
53
|
return false;
|
|
64
54
|
}
|
|
65
55
|
size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
|
|
66
|
-
uint64_t compensated_compact_bytes =
|
|
67
|
-
level_files[start]->compensated_file_size;
|
|
68
56
|
size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
|
|
69
57
|
// Compaction range will be [start, limit).
|
|
70
58
|
size_t limit;
|
|
@@ -73,11 +61,10 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
|
|
|
73
61
|
size_t new_compact_bytes_per_del_file = 0;
|
|
74
62
|
for (limit = start + 1; limit < level_files.size(); ++limit) {
|
|
75
63
|
compact_bytes += static_cast<size_t>(level_files[limit]->fd.file_size);
|
|
76
|
-
compensated_compact_bytes += level_files[limit]->compensated_file_size;
|
|
77
64
|
new_compact_bytes_per_del_file = compact_bytes / (limit - start);
|
|
78
65
|
if (level_files[limit]->being_compacted ||
|
|
79
66
|
new_compact_bytes_per_del_file > compact_bytes_per_del_file ||
|
|
80
|
-
|
|
67
|
+
compact_bytes > max_compaction_bytes) {
|
|
81
68
|
break;
|
|
82
69
|
}
|
|
83
70
|
compact_bytes_per_del_file = new_compact_bytes_per_del_file;
|
|
@@ -327,12 +314,19 @@ bool CompactionPicker::FilesRangeOverlapWithCompaction(
|
|
|
327
314
|
int penultimate_level =
|
|
328
315
|
Compaction::EvaluatePenultimateLevel(ioptions_, start_level, level);
|
|
329
316
|
if (penultimate_level != Compaction::kInvalidLevel) {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
317
|
+
if (ioptions_.compaction_style == kCompactionStyleUniversal) {
|
|
318
|
+
if (RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
|
|
319
|
+
penultimate_level)) {
|
|
320
|
+
return true;
|
|
321
|
+
}
|
|
322
|
+
} else {
|
|
323
|
+
InternalKey penultimate_smallest, penultimate_largest;
|
|
324
|
+
GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
|
|
325
|
+
if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
|
|
326
|
+
penultimate_largest.user_key(),
|
|
327
|
+
penultimate_level)) {
|
|
328
|
+
return true;
|
|
329
|
+
}
|
|
336
330
|
}
|
|
337
331
|
}
|
|
338
332
|
|
|
@@ -507,8 +501,8 @@ bool CompactionPicker::SetupOtherInputs(
|
|
|
507
501
|
if (!output_level_inputs->empty()) {
|
|
508
502
|
const uint64_t limit = mutable_cf_options.max_compaction_bytes;
|
|
509
503
|
const uint64_t output_level_inputs_size =
|
|
510
|
-
|
|
511
|
-
const uint64_t inputs_size =
|
|
504
|
+
TotalFileSize(output_level_inputs->files);
|
|
505
|
+
const uint64_t inputs_size = TotalFileSize(inputs->files);
|
|
512
506
|
bool expand_inputs = false;
|
|
513
507
|
|
|
514
508
|
CompactionInputFiles expanded_inputs;
|
|
@@ -527,13 +521,13 @@ bool CompactionPicker::SetupOtherInputs(
|
|
|
527
521
|
&expanded_inputs.files, base_index,
|
|
528
522
|
nullptr);
|
|
529
523
|
}
|
|
530
|
-
uint64_t expanded_inputs_size =
|
|
531
|
-
TotalCompensatedFileSize(expanded_inputs.files);
|
|
524
|
+
uint64_t expanded_inputs_size = TotalFileSize(expanded_inputs.files);
|
|
532
525
|
if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) {
|
|
533
526
|
try_overlapping_inputs = false;
|
|
534
527
|
}
|
|
535
528
|
if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() &&
|
|
536
|
-
|
|
529
|
+
(mutable_cf_options.ignore_max_compaction_bytes_for_input ||
|
|
530
|
+
output_level_inputs_size + expanded_inputs_size < limit) &&
|
|
537
531
|
!AreFilesInCompaction(expanded_inputs.files)) {
|
|
538
532
|
InternalKey new_start, new_limit;
|
|
539
533
|
GetRange(expanded_inputs, &new_start, &new_limit);
|
|
@@ -554,9 +548,10 @@ bool CompactionPicker::SetupOtherInputs(
|
|
|
554
548
|
vstorage->GetCleanInputsWithinInterval(input_level, &all_start,
|
|
555
549
|
&all_limit, &expanded_inputs.files,
|
|
556
550
|
base_index, nullptr);
|
|
557
|
-
expanded_inputs_size =
|
|
551
|
+
expanded_inputs_size = TotalFileSize(expanded_inputs.files);
|
|
558
552
|
if (expanded_inputs.size() > inputs->size() &&
|
|
559
|
-
|
|
553
|
+
(mutable_cf_options.ignore_max_compaction_bytes_for_input ||
|
|
554
|
+
output_level_inputs_size + expanded_inputs_size < limit) &&
|
|
560
555
|
!AreFilesInCompaction(expanded_inputs.files)) {
|
|
561
556
|
expand_inputs = true;
|
|
562
557
|
}
|
|
@@ -724,18 +719,18 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
724
719
|
}
|
|
725
720
|
largest = &inputs[i]->largest;
|
|
726
721
|
|
|
727
|
-
uint64_t
|
|
722
|
+
uint64_t input_file_size = inputs[i]->fd.GetFileSize();
|
|
728
723
|
uint64_t output_level_total = 0;
|
|
729
724
|
if (output_level < vstorage->num_non_empty_levels()) {
|
|
730
725
|
std::vector<FileMetaData*> files;
|
|
731
726
|
vstorage->GetOverlappingInputsRangeBinarySearch(
|
|
732
727
|
output_level, smallest, largest, &files, hint_index, &hint_index);
|
|
733
728
|
for (const auto& file : files) {
|
|
734
|
-
output_level_total += file->
|
|
729
|
+
output_level_total += file->fd.GetFileSize();
|
|
735
730
|
}
|
|
736
731
|
}
|
|
737
732
|
|
|
738
|
-
input_level_total +=
|
|
733
|
+
input_level_total += input_file_size;
|
|
739
734
|
|
|
740
735
|
if (input_level_total + output_level_total >= limit) {
|
|
741
736
|
covering_the_whole_range = false;
|