@nxtedition/rocksdb 13.5.13 → 15.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +55 -180
- package/binding.gyp +2 -2
- package/chained-batch.js +9 -16
- package/deps/rocksdb/rocksdb/BUCK +18 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +20 -9
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
- package/deps/rocksdb/rocksdb/db/c.cc +207 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
- package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
- package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
- package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
- package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
- package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
- package/deps/rocksdb/rocksdb/env/env.cc +12 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/options.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
- package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
- package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
- package/deps/rocksdb/rocksdb/table/format.cc +6 -12
- package/deps/rocksdb/rocksdb/table/format.h +10 -0
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
- package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
- package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
- package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
- package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
- package/deps/rocksdb/rocksdb/util/coding.h +3 -3
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
- package/deps/rocksdb/rocksdb/util/compression.h +5 -0
- package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
- package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
- package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
- package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
- package/index.js +18 -42
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +38 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
|
@@ -170,7 +170,11 @@ CompactionJob::CompactionJob(
|
|
|
170
170
|
blob_output_directory_(blob_output_directory),
|
|
171
171
|
db_mutex_(db_mutex),
|
|
172
172
|
db_error_handler_(db_error_handler),
|
|
173
|
-
|
|
173
|
+
// job_context cannot be nullptr, but we will assert later in the body of
|
|
174
|
+
// the constructor.
|
|
175
|
+
earliest_snapshot_(job_context
|
|
176
|
+
? job_context->GetEarliestSnapshotSequence()
|
|
177
|
+
: kMaxSequenceNumber),
|
|
174
178
|
job_context_(job_context),
|
|
175
179
|
table_cache_(std::move(table_cache)),
|
|
176
180
|
event_logger_(event_logger),
|
|
@@ -185,6 +189,7 @@ CompactionJob::CompactionJob(
|
|
|
185
189
|
bg_bottom_compaction_scheduled_(bg_bottom_compaction_scheduled) {
|
|
186
190
|
assert(job_stats_ != nullptr);
|
|
187
191
|
assert(log_buffer_ != nullptr);
|
|
192
|
+
assert(job_context);
|
|
188
193
|
assert(job_context->snapshot_context_initialized);
|
|
189
194
|
|
|
190
195
|
const auto* cfd = compact_->compaction->column_family_data();
|
|
@@ -668,16 +673,17 @@ void CompactionJob::GenSubcompactionBoundaries() {
|
|
|
668
673
|
extra_num_subcompaction_threads_reserved_));
|
|
669
674
|
}
|
|
670
675
|
|
|
671
|
-
|
|
676
|
+
void CompactionJob::InitializeCompactionRun() {
|
|
672
677
|
AutoThreadOperationStageUpdater stage_updater(
|
|
673
678
|
ThreadStatus::STAGE_COMPACTION_RUN);
|
|
674
679
|
TEST_SYNC_POINT("CompactionJob::Run():Start");
|
|
675
680
|
log_buffer_->FlushBufferToLog();
|
|
676
681
|
LogCompaction();
|
|
682
|
+
}
|
|
677
683
|
|
|
684
|
+
void CompactionJob::RunSubcompactions() {
|
|
678
685
|
const size_t num_threads = compact_->sub_compact_states.size();
|
|
679
686
|
assert(num_threads > 0);
|
|
680
|
-
const uint64_t start_micros = db_options_.clock->NowMicros();
|
|
681
687
|
compact_->compaction->GetOrInitInputTableProperties();
|
|
682
688
|
|
|
683
689
|
// Launch a thread for each of subcompactions 1...num_threads-1
|
|
@@ -696,25 +702,43 @@ Status CompactionJob::Run() {
|
|
|
696
702
|
for (auto& thread : thread_pool) {
|
|
697
703
|
thread.join();
|
|
698
704
|
}
|
|
705
|
+
RemoveEmptyOutputs();
|
|
706
|
+
|
|
707
|
+
ReleaseSubcompactionResources();
|
|
708
|
+
TEST_SYNC_POINT("CompactionJob::ReleaseSubcompactionResources");
|
|
709
|
+
}
|
|
699
710
|
|
|
711
|
+
void CompactionJob::UpdateTimingStats(uint64_t start_micros) {
|
|
700
712
|
internal_stats_.SetMicros(db_options_.clock->NowMicros() - start_micros);
|
|
701
713
|
|
|
702
714
|
for (auto& state : compact_->sub_compact_states) {
|
|
703
715
|
internal_stats_.AddCpuMicros(state.compaction_job_stats.cpu_micros);
|
|
704
|
-
state.RemoveLastEmptyOutput();
|
|
705
716
|
}
|
|
706
717
|
|
|
707
718
|
RecordTimeToHistogram(stats_, COMPACTION_TIME,
|
|
708
719
|
internal_stats_.output_level_stats.micros);
|
|
709
720
|
RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
|
|
710
721
|
internal_stats_.output_level_stats.cpu_micros);
|
|
722
|
+
}
|
|
711
723
|
|
|
712
|
-
|
|
724
|
+
void CompactionJob::RemoveEmptyOutputs() {
|
|
725
|
+
for (auto& state : compact_->sub_compact_states) {
|
|
726
|
+
state.RemoveLastEmptyOutput();
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
bool CompactionJob::HasNewBlobFiles() const {
|
|
731
|
+
for (const auto& state : compact_->sub_compact_states) {
|
|
732
|
+
if (state.Current().HasBlobFileAdditions()) {
|
|
733
|
+
return true;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
return false;
|
|
737
|
+
}
|
|
713
738
|
|
|
714
|
-
|
|
739
|
+
Status CompactionJob::CollectSubcompactionErrors() {
|
|
715
740
|
Status status;
|
|
716
741
|
IOStatus io_s;
|
|
717
|
-
bool wrote_new_blob_files = false;
|
|
718
742
|
|
|
719
743
|
for (const auto& state : compact_->sub_compact_states) {
|
|
720
744
|
if (!state.status.ok()) {
|
|
@@ -722,125 +746,131 @@ Status CompactionJob::Run() {
|
|
|
722
746
|
io_s = state.io_status;
|
|
723
747
|
break;
|
|
724
748
|
}
|
|
725
|
-
|
|
726
|
-
if (state.Current().HasBlobFileAdditions()) {
|
|
727
|
-
wrote_new_blob_files = true;
|
|
728
|
-
}
|
|
729
749
|
}
|
|
730
750
|
|
|
731
751
|
if (io_status_.ok()) {
|
|
732
752
|
io_status_ = io_s;
|
|
733
753
|
}
|
|
734
|
-
if (status.ok()) {
|
|
735
|
-
constexpr IODebugContext* dbg = nullptr;
|
|
736
754
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
IOOptions(), dbg,
|
|
740
|
-
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
741
|
-
}
|
|
755
|
+
return status;
|
|
756
|
+
}
|
|
742
757
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
758
|
+
Status CompactionJob::SyncOutputDirectories() {
|
|
759
|
+
Status status;
|
|
760
|
+
IOStatus io_s;
|
|
761
|
+
constexpr IODebugContext* dbg = nullptr;
|
|
762
|
+
const bool wrote_new_blob_files = HasNewBlobFiles();
|
|
763
|
+
if (output_directory_) {
|
|
764
|
+
io_s = output_directory_->FsyncWithDirOptions(
|
|
765
|
+
IOOptions(), dbg,
|
|
766
|
+
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
749
767
|
}
|
|
768
|
+
|
|
769
|
+
if (io_s.ok() && wrote_new_blob_files && blob_output_directory_ &&
|
|
770
|
+
blob_output_directory_ != output_directory_) {
|
|
771
|
+
io_s = blob_output_directory_->FsyncWithDirOptions(
|
|
772
|
+
IOOptions(), dbg,
|
|
773
|
+
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
774
|
+
}
|
|
775
|
+
|
|
750
776
|
if (io_status_.ok()) {
|
|
751
777
|
io_status_ = io_s;
|
|
752
778
|
}
|
|
753
779
|
if (status.ok()) {
|
|
754
780
|
status = io_s;
|
|
755
781
|
}
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
782
|
+
|
|
783
|
+
return status;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
Status CompactionJob::VerifyOutputFiles() {
|
|
787
|
+
Status status;
|
|
788
|
+
std::vector<port::Thread> thread_pool;
|
|
789
|
+
std::vector<const CompactionOutputs::Output*> files_output;
|
|
790
|
+
for (const auto& state : compact_->sub_compact_states) {
|
|
791
|
+
for (const auto& output : state.GetOutputs()) {
|
|
792
|
+
files_output.emplace_back(&output);
|
|
763
793
|
}
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
}
|
|
806
|
-
}
|
|
807
|
-
if (s.ok()) {
|
|
808
|
-
s = iter->status();
|
|
809
|
-
}
|
|
810
|
-
if (s.ok() &&
|
|
811
|
-
!validator.CompareValidator(files_output[file_idx]->validator)) {
|
|
812
|
-
s = Status::Corruption("Paranoid checksums do not match");
|
|
794
|
+
}
|
|
795
|
+
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
|
|
796
|
+
std::atomic<size_t> next_file_idx(0);
|
|
797
|
+
auto verify_table = [&](Status& output_status) {
|
|
798
|
+
while (true) {
|
|
799
|
+
size_t file_idx = next_file_idx.fetch_add(1);
|
|
800
|
+
if (file_idx >= files_output.size()) {
|
|
801
|
+
break;
|
|
802
|
+
}
|
|
803
|
+
// Verify that the table is usable
|
|
804
|
+
// We set for_compaction to false and don't
|
|
805
|
+
// OptimizeForCompactionTableRead here because this is a special case
|
|
806
|
+
// after we finish the table building No matter whether
|
|
807
|
+
// use_direct_io_for_flush_and_compaction is true, we will regard this
|
|
808
|
+
// verification as user reads since the goal is to cache it here for
|
|
809
|
+
// further user reads
|
|
810
|
+
ReadOptions verify_table_read_options(Env::IOActivity::kCompaction);
|
|
811
|
+
verify_table_read_options.rate_limiter_priority =
|
|
812
|
+
GetRateLimiterPriority();
|
|
813
|
+
InternalIterator* iter = cfd->table_cache()->NewIterator(
|
|
814
|
+
verify_table_read_options, file_options_, cfd->internal_comparator(),
|
|
815
|
+
files_output[file_idx]->meta,
|
|
816
|
+
/*range_del_agg=*/nullptr, compact_->compaction->mutable_cf_options(),
|
|
817
|
+
/*table_reader_ptr=*/nullptr,
|
|
818
|
+
cfd->internal_stats()->GetFileReadHist(
|
|
819
|
+
compact_->compaction->output_level()),
|
|
820
|
+
TableReaderCaller::kCompactionRefill, /*arena=*/nullptr,
|
|
821
|
+
/*skip_filters=*/false, compact_->compaction->output_level(),
|
|
822
|
+
MaxFileSizeForL0MetaPin(compact_->compaction->mutable_cf_options()),
|
|
823
|
+
/*smallest_compaction_key=*/nullptr,
|
|
824
|
+
/*largest_compaction_key=*/nullptr,
|
|
825
|
+
/*allow_unprepared_value=*/false);
|
|
826
|
+
auto s = iter->status();
|
|
827
|
+
|
|
828
|
+
if (s.ok() && paranoid_file_checks_) {
|
|
829
|
+
OutputValidator validator(cfd->internal_comparator(),
|
|
830
|
+
/*_enable_hash=*/true);
|
|
831
|
+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
832
|
+
s = validator.Add(iter->key(), iter->value());
|
|
833
|
+
if (!s.ok()) {
|
|
834
|
+
break;
|
|
813
835
|
}
|
|
814
836
|
}
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
if (
|
|
819
|
-
|
|
820
|
-
|
|
837
|
+
if (s.ok()) {
|
|
838
|
+
s = iter->status();
|
|
839
|
+
}
|
|
840
|
+
if (s.ok() &&
|
|
841
|
+
!validator.CompareValidator(files_output[file_idx]->validator)) {
|
|
842
|
+
s = Status::Corruption("Paranoid checksums do not match");
|
|
821
843
|
}
|
|
822
844
|
}
|
|
823
|
-
};
|
|
824
|
-
for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) {
|
|
825
|
-
thread_pool.emplace_back(
|
|
826
|
-
verify_table, std::ref(compact_->sub_compact_states[i].status));
|
|
827
|
-
}
|
|
828
|
-
verify_table(compact_->sub_compact_states[0].status);
|
|
829
|
-
for (auto& thread : thread_pool) {
|
|
830
|
-
thread.join();
|
|
831
|
-
}
|
|
832
845
|
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
846
|
+
delete iter;
|
|
847
|
+
|
|
848
|
+
if (!s.ok()) {
|
|
849
|
+
output_status = s;
|
|
836
850
|
break;
|
|
837
851
|
}
|
|
838
852
|
}
|
|
853
|
+
};
|
|
854
|
+
for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) {
|
|
855
|
+
thread_pool.emplace_back(verify_table,
|
|
856
|
+
std::ref(compact_->sub_compact_states[i].status));
|
|
857
|
+
}
|
|
858
|
+
verify_table(compact_->sub_compact_states[0].status);
|
|
859
|
+
for (auto& thread : thread_pool) {
|
|
860
|
+
thread.join();
|
|
839
861
|
}
|
|
840
862
|
|
|
841
|
-
|
|
842
|
-
|
|
863
|
+
for (const auto& state : compact_->sub_compact_states) {
|
|
864
|
+
if (!state.status.ok()) {
|
|
865
|
+
status = state.status;
|
|
866
|
+
break;
|
|
867
|
+
}
|
|
868
|
+
}
|
|
843
869
|
|
|
870
|
+
return status;
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
void CompactionJob::SetOutputTableProperties() {
|
|
844
874
|
for (const auto& state : compact_->sub_compact_states) {
|
|
845
875
|
for (const auto& output : state.GetOutputs()) {
|
|
846
876
|
auto fn =
|
|
@@ -850,7 +880,9 @@ Status CompactionJob::Run() {
|
|
|
850
880
|
output.table_properties);
|
|
851
881
|
}
|
|
852
882
|
}
|
|
883
|
+
}
|
|
853
884
|
|
|
885
|
+
void CompactionJob::AggregateSubcompactionOutputAndJobStats() {
|
|
854
886
|
// Before the compaction starts, is_remote_compaction was set to true if
|
|
855
887
|
// compaction_service is set. We now know whether each sub_compaction was
|
|
856
888
|
// done remotely or not. Reset is_remote_compaction back to false and allow
|
|
@@ -859,68 +891,88 @@ Status CompactionJob::Run() {
|
|
|
859
891
|
|
|
860
892
|
// Finish up all bookkeeping to unify the subcompaction results.
|
|
861
893
|
compact_->AggregateCompactionStats(internal_stats_, *job_stats_);
|
|
894
|
+
}
|
|
862
895
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
ROCKS_LOG_WARN(
|
|
872
|
-
db_options_.info_log, "[%s] [JOB %d] Compaction with status: %s",
|
|
873
|
-
compact_->compaction->column_family_data()->GetName().c_str(),
|
|
874
|
-
job_context_->job_id, status.ToString().c_str());
|
|
875
|
-
}
|
|
896
|
+
Status CompactionJob::VerifyCompactionRecordCounts(
|
|
897
|
+
bool stats_built_from_input_table_prop, uint64_t num_input_range_del) {
|
|
898
|
+
Status status;
|
|
899
|
+
if (stats_built_from_input_table_prop &&
|
|
900
|
+
job_stats_->has_accurate_num_input_records) {
|
|
901
|
+
status = VerifyInputRecordCount(num_input_range_del);
|
|
902
|
+
if (!status.ok()) {
|
|
903
|
+
return status;
|
|
876
904
|
}
|
|
877
|
-
UpdateCompactionJobInputStats(internal_stats_, num_input_range_del);
|
|
878
905
|
}
|
|
879
|
-
UpdateCompactionJobOutputStats(internal_stats_);
|
|
880
906
|
|
|
881
|
-
// Verify number of output records
|
|
882
|
-
// Only verify on table with format collects table properties
|
|
883
907
|
const auto& mutable_cf_options = compact_->compaction->mutable_cf_options();
|
|
884
|
-
if (
|
|
885
|
-
(mutable_cf_options.table_factory->IsInstanceOf(
|
|
908
|
+
if ((mutable_cf_options.table_factory->IsInstanceOf(
|
|
886
909
|
TableFactory::kBlockBasedTableName()) ||
|
|
887
910
|
mutable_cf_options.table_factory->IsInstanceOf(
|
|
888
|
-
TableFactory::kPlainTableName()))
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
for (const auto& output : state.GetOutputs()) {
|
|
893
|
-
total_output_num += output.table_properties->num_entries -
|
|
894
|
-
output.table_properties->num_range_deletions;
|
|
895
|
-
}
|
|
896
|
-
}
|
|
897
|
-
|
|
898
|
-
uint64_t expected = internal_stats_.output_level_stats.num_output_records;
|
|
899
|
-
if (internal_stats_.has_proximal_level_output) {
|
|
900
|
-
expected += internal_stats_.proximal_level_stats.num_output_records;
|
|
901
|
-
}
|
|
902
|
-
if (expected != total_output_num) {
|
|
903
|
-
char scratch[2345];
|
|
904
|
-
compact_->compaction->Summary(scratch, sizeof(scratch));
|
|
905
|
-
std::string msg =
|
|
906
|
-
"Number of keys in compaction output SST files does not match "
|
|
907
|
-
"number of keys added. Expected " +
|
|
908
|
-
std::to_string(expected) + " but there are " +
|
|
909
|
-
std::to_string(total_output_num) +
|
|
910
|
-
" in output SST files. Compaction summary: " + scratch;
|
|
911
|
-
ROCKS_LOG_WARN(
|
|
912
|
-
db_options_.info_log, "[%s] [JOB %d] Compaction with status: %s",
|
|
913
|
-
compact_->compaction->column_family_data()->GetName().c_str(),
|
|
914
|
-
job_context_->job_id, msg.c_str());
|
|
915
|
-
status = Status::Corruption(msg);
|
|
911
|
+
TableFactory::kPlainTableName()))) {
|
|
912
|
+
status = VerifyOutputRecordCount();
|
|
913
|
+
if (!status.ok()) {
|
|
914
|
+
return status;
|
|
916
915
|
}
|
|
917
916
|
}
|
|
917
|
+
return status;
|
|
918
|
+
}
|
|
918
919
|
|
|
920
|
+
void CompactionJob::FinalizeCompactionRun(
|
|
921
|
+
const Status& input_status, bool stats_built_from_input_table_prop,
|
|
922
|
+
uint64_t num_input_range_del) {
|
|
923
|
+
if (stats_built_from_input_table_prop) {
|
|
924
|
+
UpdateCompactionJobInputStatsFromInternalStats(internal_stats_,
|
|
925
|
+
num_input_range_del);
|
|
926
|
+
}
|
|
927
|
+
UpdateCompactionJobOutputStatsFromInternalStats(internal_stats_);
|
|
919
928
|
RecordCompactionIOStats();
|
|
929
|
+
|
|
920
930
|
LogFlush(db_options_.info_log);
|
|
921
931
|
TEST_SYNC_POINT("CompactionJob::Run():End");
|
|
922
|
-
compact_->status =
|
|
923
|
-
TEST_SYNC_POINT_CALLBACK("CompactionJob::Run():EndStatusSet",
|
|
932
|
+
compact_->status = input_status;
|
|
933
|
+
TEST_SYNC_POINT_CALLBACK("CompactionJob::Run():EndStatusSet",
|
|
934
|
+
const_cast<Status*>(&input_status));
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
Status CompactionJob::Run() {
|
|
938
|
+
InitializeCompactionRun();
|
|
939
|
+
|
|
940
|
+
const uint64_t start_micros = db_options_.clock->NowMicros();
|
|
941
|
+
|
|
942
|
+
RunSubcompactions();
|
|
943
|
+
|
|
944
|
+
UpdateTimingStats(start_micros);
|
|
945
|
+
|
|
946
|
+
TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify");
|
|
947
|
+
|
|
948
|
+
Status status = CollectSubcompactionErrors();
|
|
949
|
+
|
|
950
|
+
if (status.ok()) {
|
|
951
|
+
status = SyncOutputDirectories();
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (status.ok()) {
|
|
955
|
+
status = VerifyOutputFiles();
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
if (status.ok()) {
|
|
959
|
+
SetOutputTableProperties();
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
AggregateSubcompactionOutputAndJobStats();
|
|
963
|
+
|
|
964
|
+
uint64_t num_input_range_del = 0;
|
|
965
|
+
bool stats_built_from_input_table_prop =
|
|
966
|
+
UpdateInternalStatsFromInputFiles(&num_input_range_del);
|
|
967
|
+
|
|
968
|
+
if (status.ok()) {
|
|
969
|
+
status = VerifyCompactionRecordCounts(stats_built_from_input_table_prop,
|
|
970
|
+
num_input_range_del);
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
FinalizeCompactionRun(status, stats_built_from_input_table_prop,
|
|
974
|
+
num_input_range_del);
|
|
975
|
+
|
|
924
976
|
return status;
|
|
925
977
|
}
|
|
926
978
|
|
|
@@ -1144,58 +1196,62 @@ void CompactionJob::NotifyOnSubcompactionCompleted(
|
|
|
1144
1196
|
}
|
|
1145
1197
|
}
|
|
1146
1198
|
|
|
1147
|
-
|
|
1148
|
-
assert(sub_compact);
|
|
1149
|
-
assert(sub_compact->compaction);
|
|
1199
|
+
bool CompactionJob::ShouldUseLocalCompaction(SubcompactionState* sub_compact) {
|
|
1150
1200
|
if (db_options_.compaction_service) {
|
|
1151
1201
|
CompactionServiceJobStatus comp_status =
|
|
1152
1202
|
ProcessKeyValueCompactionWithCompactionService(sub_compact);
|
|
1153
1203
|
if (comp_status != CompactionServiceJobStatus::kUseLocal) {
|
|
1154
|
-
return;
|
|
1204
|
+
return false;
|
|
1155
1205
|
}
|
|
1156
1206
|
// fallback to local compaction
|
|
1157
1207
|
assert(comp_status == CompactionServiceJobStatus::kUseLocal);
|
|
1158
1208
|
sub_compact->compaction_job_stats.is_remote_compaction = false;
|
|
1159
1209
|
}
|
|
1210
|
+
return true;
|
|
1211
|
+
}
|
|
1160
1212
|
|
|
1161
|
-
|
|
1213
|
+
CompactionJob::CompactionIOStatsSnapshot CompactionJob::InitializeIOStats() {
|
|
1214
|
+
CompactionIOStatsSnapshot io_stats;
|
|
1162
1215
|
|
|
1163
|
-
|
|
1216
|
+
if (measure_io_stats_) {
|
|
1217
|
+
io_stats.prev_perf_level = GetPerfLevel();
|
|
1218
|
+
SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
|
|
1219
|
+
io_stats.prev_write_nanos = IOSTATS(write_nanos);
|
|
1220
|
+
io_stats.prev_fsync_nanos = IOSTATS(fsync_nanos);
|
|
1221
|
+
io_stats.prev_range_sync_nanos = IOSTATS(range_sync_nanos);
|
|
1222
|
+
io_stats.prev_prepare_write_nanos = IOSTATS(prepare_write_nanos);
|
|
1223
|
+
io_stats.prev_cpu_write_nanos = IOSTATS(cpu_write_nanos);
|
|
1224
|
+
io_stats.prev_cpu_read_nanos = IOSTATS(cpu_read_nanos);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
return io_stats;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
Status CompactionJob::SetupAndValidateCompactionFilter(
|
|
1231
|
+
SubcompactionState* sub_compact,
|
|
1232
|
+
const CompactionFilter* configured_compaction_filter,
|
|
1233
|
+
const CompactionFilter*& compaction_filter,
|
|
1234
|
+
std::unique_ptr<CompactionFilter>& compaction_filter_from_factory) {
|
|
1235
|
+
compaction_filter = configured_compaction_filter;
|
|
1164
1236
|
|
|
1165
|
-
// Create compaction filter and fail the compaction if
|
|
1166
|
-
// IgnoreSnapshots() = false because it is not supported anymore
|
|
1167
|
-
const CompactionFilter* compaction_filter = cfd->ioptions().compaction_filter;
|
|
1168
|
-
std::unique_ptr<CompactionFilter> compaction_filter_from_factory = nullptr;
|
|
1169
1237
|
if (compaction_filter == nullptr) {
|
|
1170
1238
|
compaction_filter_from_factory =
|
|
1171
1239
|
sub_compact->compaction->CreateCompactionFilter();
|
|
1172
1240
|
compaction_filter = compaction_filter_from_factory.get();
|
|
1173
1241
|
}
|
|
1242
|
+
|
|
1174
1243
|
if (compaction_filter != nullptr && !compaction_filter->IgnoreSnapshots()) {
|
|
1175
|
-
|
|
1244
|
+
return Status::NotSupported(
|
|
1176
1245
|
"CompactionFilter::IgnoreSnapshots() = false is not supported "
|
|
1177
1246
|
"anymore.");
|
|
1178
|
-
return;
|
|
1179
1247
|
}
|
|
1180
1248
|
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
// This is assigned after creation of SubcompactionState to simplify that
|
|
1184
|
-
// creation across both CompactionJob and CompactionServiceCompactionJob
|
|
1185
|
-
sub_compact->AssignRangeDelAggregator(
|
|
1186
|
-
std::make_unique<CompactionRangeDelAggregator>(
|
|
1187
|
-
&cfd->internal_comparator(), job_context_->snapshot_seqs,
|
|
1188
|
-
&full_history_ts_low_, &trim_ts_));
|
|
1189
|
-
|
|
1190
|
-
// TODO: since we already use C++17, should use
|
|
1191
|
-
// std::optional<const Slice> instead.
|
|
1192
|
-
const std::optional<Slice> start = sub_compact->start;
|
|
1193
|
-
const std::optional<Slice> end = sub_compact->end;
|
|
1194
|
-
|
|
1195
|
-
std::optional<Slice> start_without_ts;
|
|
1196
|
-
std::optional<Slice> end_without_ts;
|
|
1249
|
+
return Status::OK();
|
|
1250
|
+
}
|
|
1197
1251
|
|
|
1198
|
-
|
|
1252
|
+
void CompactionJob::InitializeReadOptions(
|
|
1253
|
+
ColumnFamilyData* cfd, ReadOptions& read_options,
|
|
1254
|
+
SubcompactionKeyBoundaries& boundaries) {
|
|
1199
1255
|
read_options.verify_checksums = true;
|
|
1200
1256
|
read_options.fill_cache = false;
|
|
1201
1257
|
read_options.rate_limiter_priority = GetRateLimiterPriority();
|
|
@@ -1206,223 +1262,207 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1206
1262
|
// (b) CompactionFilter::Decision::kRemoveAndSkipUntil.
|
|
1207
1263
|
read_options.total_order_seek = true;
|
|
1208
1264
|
|
|
1209
|
-
const WriteOptions write_options(Env::IOPriority::IO_LOW,
|
|
1210
|
-
Env::IOActivity::kCompaction);
|
|
1211
|
-
|
|
1212
1265
|
// Remove the timestamps from boundaries because boundaries created in
|
|
1213
1266
|
// GenSubcompactionBoundaries doesn't strip away the timestamp.
|
|
1214
|
-
size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1215
|
-
|
|
1216
|
-
|
|
1267
|
+
const size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1268
|
+
|
|
1269
|
+
if (boundaries.start.has_value()) {
|
|
1270
|
+
read_options.iterate_lower_bound = &(*boundaries.start);
|
|
1217
1271
|
if (ts_sz > 0) {
|
|
1218
|
-
start_without_ts =
|
|
1219
|
-
|
|
1272
|
+
boundaries.start_without_ts =
|
|
1273
|
+
StripTimestampFromUserKey(*boundaries.start, ts_sz);
|
|
1274
|
+
read_options.iterate_lower_bound = &(*boundaries.start_without_ts);
|
|
1220
1275
|
}
|
|
1221
1276
|
}
|
|
1222
|
-
if (end.has_value()) {
|
|
1223
|
-
read_options.iterate_upper_bound = &(*end);
|
|
1277
|
+
if (boundaries.end.has_value()) {
|
|
1278
|
+
read_options.iterate_upper_bound = &(*boundaries.end);
|
|
1224
1279
|
if (ts_sz > 0) {
|
|
1225
|
-
end_without_ts =
|
|
1226
|
-
|
|
1280
|
+
boundaries.end_without_ts =
|
|
1281
|
+
StripTimestampFromUserKey(*boundaries.end, ts_sz);
|
|
1282
|
+
read_options.iterate_upper_bound = &(*boundaries.end_without_ts);
|
|
1227
1283
|
}
|
|
1228
1284
|
}
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
InternalIterator* CompactionJob::CreateInputIterator(
|
|
1288
|
+
SubcompactionState* sub_compact, ColumnFamilyData* cfd,
|
|
1289
|
+
SubcompactionInternalIterators& iterators,
|
|
1290
|
+
SubcompactionKeyBoundaries& boundaries, ReadOptions& read_options) {
|
|
1291
|
+
// This is assigned after creation of SubcompactionState to simplify that
|
|
1292
|
+
// creation across both CompactionJob and CompactionServiceCompactionJob
|
|
1293
|
+
sub_compact->AssignRangeDelAggregator(
|
|
1294
|
+
std::make_unique<CompactionRangeDelAggregator>(
|
|
1295
|
+
&cfd->internal_comparator(), job_context_->snapshot_seqs,
|
|
1296
|
+
&full_history_ts_low_, &trim_ts_));
|
|
1297
|
+
|
|
1298
|
+
InitializeReadOptions(cfd, read_options, boundaries);
|
|
1229
1299
|
|
|
1230
1300
|
// Although the v2 aggregator is what the level iterator(s) know about,
|
|
1231
1301
|
// the AddTombstones calls will be propagated down to the v1 aggregator.
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
Slice start_slice;
|
|
1240
|
-
Slice end_slice;
|
|
1241
|
-
Slice start_user_key{};
|
|
1242
|
-
Slice end_user_key{};
|
|
1243
|
-
|
|
1244
|
-
static constexpr char kMaxTs[] =
|
|
1245
|
-
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
|
|
1246
|
-
Slice ts_slice;
|
|
1247
|
-
std::string max_ts;
|
|
1302
|
+
iterators.raw_input =
|
|
1303
|
+
std::unique_ptr<InternalIterator>(versions_->MakeInputIterator(
|
|
1304
|
+
read_options, sub_compact->compaction, sub_compact->RangeDelAgg(),
|
|
1305
|
+
file_options_for_read_, boundaries.start, boundaries.end));
|
|
1306
|
+
InternalIterator* input = iterators.raw_input.get();
|
|
1307
|
+
|
|
1308
|
+
const size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1248
1309
|
if (ts_sz > 0) {
|
|
1249
|
-
if (ts_sz <= strlen(kMaxTs)) {
|
|
1250
|
-
ts_slice = Slice(kMaxTs, ts_sz);
|
|
1310
|
+
if (ts_sz <= strlen(boundaries.kMaxTs)) {
|
|
1311
|
+
boundaries.ts_slice = Slice(boundaries.kMaxTs, ts_sz);
|
|
1251
1312
|
} else {
|
|
1252
|
-
max_ts = std::string(ts_sz, '\xff');
|
|
1253
|
-
ts_slice = Slice(max_ts);
|
|
1313
|
+
boundaries.max_ts = std::string(ts_sz, '\xff');
|
|
1314
|
+
boundaries.ts_slice = Slice(boundaries.max_ts);
|
|
1254
1315
|
}
|
|
1255
1316
|
}
|
|
1256
1317
|
|
|
1257
|
-
if (start.has_value()) {
|
|
1258
|
-
start_ikey.SetInternalKey(*start, kMaxSequenceNumber,
|
|
1318
|
+
if (boundaries.start.has_value()) {
|
|
1319
|
+
boundaries.start_ikey.SetInternalKey(*boundaries.start, kMaxSequenceNumber,
|
|
1320
|
+
kValueTypeForSeek);
|
|
1259
1321
|
if (ts_sz > 0) {
|
|
1260
|
-
start_ikey.UpdateInternalKey(
|
|
1261
|
-
|
|
1322
|
+
boundaries.start_ikey.UpdateInternalKey(
|
|
1323
|
+
kMaxSequenceNumber, kValueTypeForSeek, &boundaries.ts_slice);
|
|
1262
1324
|
}
|
|
1263
|
-
|
|
1264
|
-
start_user_key = start_ikey.GetUserKey();
|
|
1325
|
+
boundaries.start_internal_key = boundaries.start_ikey.GetInternalKey();
|
|
1326
|
+
boundaries.start_user_key = boundaries.start_ikey.GetUserKey();
|
|
1265
1327
|
}
|
|
1266
|
-
if (end.has_value()) {
|
|
1267
|
-
end_ikey.SetInternalKey(*end, kMaxSequenceNumber,
|
|
1328
|
+
if (boundaries.end.has_value()) {
|
|
1329
|
+
boundaries.end_ikey.SetInternalKey(*boundaries.end, kMaxSequenceNumber,
|
|
1330
|
+
kValueTypeForSeek);
|
|
1268
1331
|
if (ts_sz > 0) {
|
|
1269
|
-
end_ikey.UpdateInternalKey(
|
|
1270
|
-
|
|
1332
|
+
boundaries.end_ikey.UpdateInternalKey(
|
|
1333
|
+
kMaxSequenceNumber, kValueTypeForSeek, &boundaries.ts_slice);
|
|
1271
1334
|
}
|
|
1272
|
-
|
|
1273
|
-
end_user_key = end_ikey.GetUserKey();
|
|
1335
|
+
boundaries.end_internal_key = boundaries.end_ikey.GetInternalKey();
|
|
1336
|
+
boundaries.end_user_key = boundaries.end_ikey.GetUserKey();
|
|
1274
1337
|
}
|
|
1275
1338
|
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
end.has_value() ? &
|
|
1281
|
-
|
|
1339
|
+
if (boundaries.start.has_value() || boundaries.end.has_value()) {
|
|
1340
|
+
iterators.clip = std::make_unique<ClippingIterator>(
|
|
1341
|
+
iterators.raw_input.get(),
|
|
1342
|
+
boundaries.start.has_value() ? &boundaries.start_internal_key : nullptr,
|
|
1343
|
+
boundaries.end.has_value() ? &boundaries.end_internal_key : nullptr,
|
|
1344
|
+
&cfd->internal_comparator());
|
|
1345
|
+
input = iterators.clip.get();
|
|
1282
1346
|
}
|
|
1283
1347
|
|
|
1284
|
-
std::unique_ptr<InternalIterator> blob_counter;
|
|
1285
|
-
|
|
1286
1348
|
if (sub_compact->compaction->DoesInputReferenceBlobFiles()) {
|
|
1287
1349
|
BlobGarbageMeter* meter = sub_compact->Current().CreateBlobGarbageMeter();
|
|
1288
|
-
blob_counter =
|
|
1289
|
-
|
|
1350
|
+
iterators.blob_counter =
|
|
1351
|
+
std::make_unique<BlobCountingIterator>(input, meter);
|
|
1352
|
+
input = iterators.blob_counter.get();
|
|
1290
1353
|
}
|
|
1291
1354
|
|
|
1292
|
-
std::unique_ptr<InternalIterator> trim_history_iter;
|
|
1293
1355
|
if (ts_sz > 0 && !trim_ts_.empty()) {
|
|
1294
|
-
trim_history_iter = std::make_unique<HistoryTrimmingIterator>(
|
|
1356
|
+
iterators.trim_history_iter = std::make_unique<HistoryTrimmingIterator>(
|
|
1295
1357
|
input, cfd->user_comparator(), trim_ts_);
|
|
1296
|
-
input = trim_history_iter.get();
|
|
1358
|
+
input = iterators.trim_history_iter.get();
|
|
1297
1359
|
}
|
|
1298
1360
|
|
|
1299
|
-
input
|
|
1300
|
-
|
|
1301
|
-
AutoThreadOperationStageUpdater stage_updater(
|
|
1302
|
-
ThreadStatus::STAGE_COMPACTION_PROCESS_KV);
|
|
1303
|
-
|
|
1304
|
-
// I/O measurement variables
|
|
1305
|
-
PerfLevel prev_perf_level = PerfLevel::kEnableTime;
|
|
1306
|
-
const uint64_t kRecordStatsEvery = 1000;
|
|
1307
|
-
uint64_t prev_write_nanos = 0;
|
|
1308
|
-
uint64_t prev_fsync_nanos = 0;
|
|
1309
|
-
uint64_t prev_range_sync_nanos = 0;
|
|
1310
|
-
uint64_t prev_prepare_write_nanos = 0;
|
|
1311
|
-
uint64_t prev_cpu_write_nanos = 0;
|
|
1312
|
-
uint64_t prev_cpu_read_nanos = 0;
|
|
1313
|
-
if (measure_io_stats_) {
|
|
1314
|
-
prev_perf_level = GetPerfLevel();
|
|
1315
|
-
SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
|
|
1316
|
-
prev_write_nanos = IOSTATS(write_nanos);
|
|
1317
|
-
prev_fsync_nanos = IOSTATS(fsync_nanos);
|
|
1318
|
-
prev_range_sync_nanos = IOSTATS(range_sync_nanos);
|
|
1319
|
-
prev_prepare_write_nanos = IOSTATS(prepare_write_nanos);
|
|
1320
|
-
prev_cpu_write_nanos = IOSTATS(cpu_write_nanos);
|
|
1321
|
-
prev_cpu_read_nanos = IOSTATS(cpu_read_nanos);
|
|
1322
|
-
}
|
|
1323
|
-
|
|
1324
|
-
MergeHelper merge(
|
|
1325
|
-
env_, cfd->user_comparator(), cfd->ioptions().merge_operator.get(),
|
|
1326
|
-
compaction_filter, db_options_.info_log.get(),
|
|
1327
|
-
false /* internal key corruption is expected */,
|
|
1328
|
-
job_context_->GetLatestSnapshotSequence(), job_context_->snapshot_checker,
|
|
1329
|
-
compact_->compaction->level(), db_options_.stats);
|
|
1361
|
+
return input;
|
|
1362
|
+
}
|
|
1330
1363
|
|
|
1364
|
+
void CompactionJob::CreateBlobFileBuilder(SubcompactionState* sub_compact,
|
|
1365
|
+
ColumnFamilyData* cfd,
|
|
1366
|
+
BlobFileResources& blob_resources,
|
|
1367
|
+
const WriteOptions& write_options) {
|
|
1331
1368
|
const auto& mutable_cf_options =
|
|
1332
1369
|
sub_compact->compaction->mutable_cf_options();
|
|
1333
1370
|
|
|
1334
|
-
std::vector<std::string> blob_file_paths;
|
|
1335
|
-
|
|
1336
1371
|
// TODO: BlobDB to support output_to_proximal_level compaction, which needs
|
|
1337
1372
|
// 2 builders, so may need to move to `CompactionOutputs`
|
|
1338
|
-
|
|
1339
|
-
(
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1373
|
+
if (mutable_cf_options.enable_blob_files &&
|
|
1374
|
+
sub_compact->compaction->output_level() >=
|
|
1375
|
+
mutable_cf_options.blob_file_starting_level) {
|
|
1376
|
+
blob_resources.blob_file_builder = std::make_unique<BlobFileBuilder>(
|
|
1377
|
+
versions_, fs_.get(), &sub_compact->compaction->immutable_options(),
|
|
1378
|
+
&mutable_cf_options, &file_options_, &write_options, db_id_,
|
|
1379
|
+
db_session_id_, job_id_, cfd->GetID(), cfd->GetName(), write_hint_,
|
|
1380
|
+
io_tracer_, blob_callback_, BlobFileCreationReason::kCompaction,
|
|
1381
|
+
&blob_resources.blob_file_paths,
|
|
1382
|
+
sub_compact->Current().GetBlobFileAdditionsPtr());
|
|
1383
|
+
} else {
|
|
1384
|
+
blob_resources.blob_file_builder = nullptr;
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1351
1387
|
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1388
|
+
std::unique_ptr<CompactionIterator> CompactionJob::CreateCompactionIterator(
|
|
1389
|
+
SubcompactionState* sub_compact, ColumnFamilyData* cfd,
|
|
1390
|
+
InternalIterator* input, const CompactionFilter* compaction_filter,
|
|
1391
|
+
MergeHelper& merge, BlobFileResources& blob_resources,
|
|
1392
|
+
const WriteOptions& write_options) {
|
|
1393
|
+
CreateBlobFileBuilder(sub_compact, cfd, blob_resources, write_options);
|
|
1356
1394
|
|
|
1357
1395
|
const std::string* const full_history_ts_low =
|
|
1358
1396
|
full_history_ts_low_.empty() ? nullptr : &full_history_ts_low_;
|
|
1359
|
-
|
|
1360
|
-
job_context_ ? job_context_->GetJobSnapshotSequence()
|
|
1361
|
-
: kMaxSequenceNumber;
|
|
1397
|
+
assert(job_context_);
|
|
1362
1398
|
|
|
1363
|
-
|
|
1399
|
+
return std::make_unique<CompactionIterator>(
|
|
1364
1400
|
input, cfd->user_comparator(), &merge, versions_->LastSequence(),
|
|
1365
1401
|
&(job_context_->snapshot_seqs), earliest_snapshot_,
|
|
1366
|
-
job_context_->earliest_write_conflict_snapshot,
|
|
1367
|
-
job_context_->
|
|
1368
|
-
ShouldReportDetailedTime(env_, stats_),
|
|
1369
|
-
|
|
1370
|
-
blob_file_builder.get(), db_options_.allow_data_in_errors,
|
|
1402
|
+
job_context_->earliest_write_conflict_snapshot,
|
|
1403
|
+
job_context_->GetJobSnapshotSequence(), job_context_->snapshot_checker,
|
|
1404
|
+
env_, ShouldReportDetailedTime(env_, stats_), sub_compact->RangeDelAgg(),
|
|
1405
|
+
blob_resources.blob_file_builder.get(), db_options_.allow_data_in_errors,
|
|
1371
1406
|
db_options_.enforce_single_del_contracts, manual_compaction_canceled_,
|
|
1372
|
-
sub_compact->compaction
|
|
1373
|
-
->DoesInputReferenceBlobFiles() /* must_count_input_entries */,
|
|
1407
|
+
sub_compact->compaction->DoesInputReferenceBlobFiles(),
|
|
1374
1408
|
sub_compact->compaction, compaction_filter, shutting_down_,
|
|
1375
1409
|
db_options_.info_log, full_history_ts_low, preserve_seqno_after_);
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
const auto& c_iter_stats = c_iter->iter_stats();
|
|
1410
|
+
}
|
|
1379
1411
|
|
|
1380
|
-
|
|
1381
|
-
|
|
1412
|
+
std::pair<CompactionFileOpenFunc, CompactionFileCloseFunc>
|
|
1413
|
+
CompactionJob::CreateFileHandlers(SubcompactionState* sub_compact,
|
|
1414
|
+
SubcompactionKeyBoundaries& boundaries) {
|
|
1382
1415
|
const CompactionFileOpenFunc open_file_func =
|
|
1383
1416
|
[this, sub_compact](CompactionOutputs& outputs) {
|
|
1384
1417
|
return this->OpenCompactionOutputFile(sub_compact, outputs);
|
|
1385
1418
|
};
|
|
1386
1419
|
|
|
1420
|
+
const Slice* start_user_key =
|
|
1421
|
+
sub_compact->start.has_value() ? &boundaries.start_user_key : nullptr;
|
|
1422
|
+
const Slice* end_user_key =
|
|
1423
|
+
sub_compact->end.has_value() ? &boundaries.end_user_key : nullptr;
|
|
1424
|
+
|
|
1387
1425
|
const CompactionFileCloseFunc close_file_func =
|
|
1388
1426
|
[this, sub_compact, start_user_key, end_user_key](
|
|
1389
1427
|
CompactionOutputs& outputs, const Status& status,
|
|
1390
1428
|
const Slice& next_table_min_key) {
|
|
1391
|
-
return this->FinishCompactionOutputFile(
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
sub_compact->end.has_value() ? &end_user_key : nullptr);
|
|
1429
|
+
return this->FinishCompactionOutputFile(status, sub_compact, outputs,
|
|
1430
|
+
next_table_min_key,
|
|
1431
|
+
start_user_key, end_user_key);
|
|
1395
1432
|
};
|
|
1396
1433
|
|
|
1434
|
+
return {open_file_func, close_file_func};
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
Status CompactionJob::ProcessKeyValue(
|
|
1438
|
+
SubcompactionState* sub_compact, ColumnFamilyData* cfd,
|
|
1439
|
+
CompactionIterator* c_iter, const CompactionFileOpenFunc& open_file_func,
|
|
1440
|
+
const CompactionFileCloseFunc& close_file_func, uint64_t& prev_cpu_micros) {
|
|
1397
1441
|
Status status;
|
|
1442
|
+
const uint64_t kRecordStatsEvery = 1000;
|
|
1443
|
+
[[maybe_unused]] const std::optional<const Slice> end = sub_compact->end;
|
|
1444
|
+
|
|
1398
1445
|
TEST_SYNC_POINT_CALLBACK(
|
|
1399
1446
|
"CompactionJob::ProcessKeyValueCompaction()::Processing",
|
|
1400
1447
|
static_cast<void*>(const_cast<Compaction*>(sub_compact->compaction)));
|
|
1401
|
-
|
|
1402
|
-
while (status.ok() && !cfd->IsDropped() && c_iter->Valid()
|
|
1403
|
-
|
|
1404
|
-
// returns true.
|
|
1448
|
+
|
|
1449
|
+
while (status.ok() && !cfd->IsDropped() && c_iter->Valid() &&
|
|
1450
|
+
c_iter->status().ok()) {
|
|
1405
1451
|
assert(!end.has_value() ||
|
|
1406
1452
|
cfd->user_comparator()->Compare(c_iter->user_key(), *end) < 0);
|
|
1407
1453
|
|
|
1408
|
-
if (
|
|
1454
|
+
if (c_iter->iter_stats().num_input_records % kRecordStatsEvery ==
|
|
1409
1455
|
kRecordStatsEvery - 1) {
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
uint64_t cur_cpu_micros = db_options_.clock->CPUMicros();
|
|
1415
|
-
assert(cur_cpu_micros >= last_cpu_micros);
|
|
1416
|
-
RecordTick(stats_, COMPACTION_CPU_TOTAL_TIME,
|
|
1417
|
-
cur_cpu_micros - last_cpu_micros);
|
|
1418
|
-
last_cpu_micros = cur_cpu_micros;
|
|
1456
|
+
UpdateSubcompactionJobStatsIncrementally(
|
|
1457
|
+
c_iter, &sub_compact->compaction_job_stats,
|
|
1458
|
+
db_options_.clock->CPUMicros(), prev_cpu_micros);
|
|
1419
1459
|
}
|
|
1420
1460
|
|
|
1421
1461
|
const auto& ikey = c_iter->ikey();
|
|
1422
1462
|
bool use_proximal_output = ikey.sequence > proximal_after_seqno_;
|
|
1463
|
+
|
|
1423
1464
|
#ifndef NDEBUG
|
|
1424
1465
|
if (sub_compact->compaction->SupportsPerKeyPlacement()) {
|
|
1425
|
-
// Could be overridden by unittest
|
|
1426
1466
|
PerKeyPlacementContext context(sub_compact->compaction->output_level(),
|
|
1427
1467
|
ikey.user_key, c_iter->value(),
|
|
1428
1468
|
ikey.sequence, use_proximal_output);
|
|
@@ -1461,9 +1501,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1461
1501
|
static_cast<void*>(const_cast<std::atomic<bool>*>(
|
|
1462
1502
|
&manual_compaction_canceled_)));
|
|
1463
1503
|
c_iter->Next();
|
|
1464
|
-
if (c_iter->status().IsManualCompactionPaused()) {
|
|
1465
|
-
break;
|
|
1466
|
-
}
|
|
1467
1504
|
|
|
1468
1505
|
#ifndef NDEBUG
|
|
1469
1506
|
bool stop = false;
|
|
@@ -1475,13 +1512,33 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1475
1512
|
#endif // NDEBUG
|
|
1476
1513
|
}
|
|
1477
1514
|
|
|
1478
|
-
|
|
1479
|
-
|
|
1515
|
+
return status;
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
void CompactionJob::UpdateSubcompactionJobStatsIncrementally(
|
|
1519
|
+
CompactionIterator* c_iter, CompactionJobStats* compaction_job_stats,
|
|
1520
|
+
uint64_t cur_cpu_micros, uint64_t& prev_cpu_micros) {
|
|
1521
|
+
RecordDroppedKeys(c_iter->iter_stats(), compaction_job_stats);
|
|
1522
|
+
c_iter->ResetRecordCounts();
|
|
1523
|
+
RecordCompactionIOStats();
|
|
1524
|
+
|
|
1525
|
+
assert(cur_cpu_micros >= prev_cpu_micros);
|
|
1526
|
+
RecordTick(stats_, COMPACTION_CPU_TOTAL_TIME,
|
|
1527
|
+
cur_cpu_micros - prev_cpu_micros);
|
|
1528
|
+
prev_cpu_micros = cur_cpu_micros;
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
void CompactionJob::FinalizeSubcompactionJobStats(
|
|
1532
|
+
SubcompactionState* sub_compact, CompactionIterator* c_iter,
|
|
1533
|
+
uint64_t start_cpu_micros, uint64_t prev_cpu_micros,
|
|
1534
|
+
const CompactionIOStatsSnapshot& io_stats) {
|
|
1535
|
+
const CompactionIterationStats& c_iter_stats = c_iter->iter_stats();
|
|
1536
|
+
|
|
1480
1537
|
assert(!sub_compact->compaction->DoesInputReferenceBlobFiles() ||
|
|
1481
1538
|
c_iter->HasNumInputEntryScanned());
|
|
1482
|
-
sub_compact->compaction_job_stats.
|
|
1539
|
+
sub_compact->compaction_job_stats.has_accurate_num_input_records &=
|
|
1483
1540
|
c_iter->HasNumInputEntryScanned();
|
|
1484
|
-
sub_compact->compaction_job_stats.num_input_records
|
|
1541
|
+
sub_compact->compaction_job_stats.num_input_records +=
|
|
1485
1542
|
c_iter->NumInputEntryScanned();
|
|
1486
1543
|
sub_compact->compaction_job_stats.num_blobs_read =
|
|
1487
1544
|
c_iter_stats.num_blobs_read;
|
|
@@ -1512,84 +1569,188 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1512
1569
|
c_iter_stats.total_blob_bytes_relocated);
|
|
1513
1570
|
}
|
|
1514
1571
|
|
|
1515
|
-
|
|
1516
|
-
|
|
1572
|
+
uint64_t cur_cpu_micros = db_options_.clock->CPUMicros();
|
|
1573
|
+
|
|
1574
|
+
// Record final compaction statistics including dropped keys, I/O stats,
|
|
1575
|
+
// and CPU time delta from the last periodic measurement
|
|
1576
|
+
UpdateSubcompactionJobStatsIncrementally(c_iter,
|
|
1577
|
+
&sub_compact->compaction_job_stats,
|
|
1578
|
+
cur_cpu_micros, prev_cpu_micros);
|
|
1579
|
+
|
|
1580
|
+
// Finalize timing and I/O statistics
|
|
1581
|
+
sub_compact->compaction_job_stats.cpu_micros =
|
|
1582
|
+
cur_cpu_micros - start_cpu_micros + sub_compact->GetWorkerCPUMicros();
|
|
1517
1583
|
|
|
1584
|
+
if (measure_io_stats_) {
|
|
1585
|
+
sub_compact->compaction_job_stats.file_write_nanos +=
|
|
1586
|
+
IOSTATS(write_nanos) - io_stats.prev_write_nanos;
|
|
1587
|
+
sub_compact->compaction_job_stats.file_fsync_nanos +=
|
|
1588
|
+
IOSTATS(fsync_nanos) - io_stats.prev_fsync_nanos;
|
|
1589
|
+
sub_compact->compaction_job_stats.file_range_sync_nanos +=
|
|
1590
|
+
IOSTATS(range_sync_nanos) - io_stats.prev_range_sync_nanos;
|
|
1591
|
+
sub_compact->compaction_job_stats.file_prepare_write_nanos +=
|
|
1592
|
+
IOSTATS(prepare_write_nanos) - io_stats.prev_prepare_write_nanos;
|
|
1593
|
+
sub_compact->compaction_job_stats.cpu_micros -=
|
|
1594
|
+
(IOSTATS(cpu_write_nanos) - io_stats.prev_cpu_write_nanos +
|
|
1595
|
+
IOSTATS(cpu_read_nanos) - io_stats.prev_cpu_read_nanos) /
|
|
1596
|
+
1000;
|
|
1597
|
+
if (io_stats.prev_perf_level !=
|
|
1598
|
+
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex) {
|
|
1599
|
+
SetPerfLevel(io_stats.prev_perf_level);
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
Status CompactionJob::FinalizeProcessKeyValueStatus(
|
|
1605
|
+
ColumnFamilyData* cfd, InternalIterator* input_iter,
|
|
1606
|
+
CompactionIterator* c_iter, Status status) {
|
|
1518
1607
|
if (status.ok() && cfd->IsDropped()) {
|
|
1519
1608
|
status =
|
|
1520
1609
|
Status::ColumnFamilyDropped("Column family dropped during compaction");
|
|
1521
1610
|
}
|
|
1522
|
-
if (
|
|
1523
|
-
shutting_down_->load(std::memory_order_relaxed)) {
|
|
1611
|
+
if (status.ok() && shutting_down_->load(std::memory_order_relaxed)) {
|
|
1524
1612
|
status = Status::ShutdownInProgress("Database shutdown");
|
|
1525
1613
|
}
|
|
1526
|
-
if (
|
|
1614
|
+
if (status.ok() &&
|
|
1527
1615
|
(manual_compaction_canceled_.load(std::memory_order_relaxed))) {
|
|
1528
1616
|
status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
1529
1617
|
}
|
|
1530
1618
|
if (status.ok()) {
|
|
1531
|
-
status =
|
|
1619
|
+
status = input_iter->status();
|
|
1532
1620
|
}
|
|
1533
1621
|
if (status.ok()) {
|
|
1534
1622
|
status = c_iter->status();
|
|
1535
1623
|
}
|
|
1536
1624
|
|
|
1625
|
+
return status;
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
Status CompactionJob::CleanupCompactionFiles(
|
|
1629
|
+
SubcompactionState* sub_compact, Status status,
|
|
1630
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
1631
|
+
const CompactionFileCloseFunc& close_file_func) {
|
|
1537
1632
|
// Call FinishCompactionOutputFile() even if status is not ok: it needs to
|
|
1538
1633
|
// close the output files. Open file function is also passed, in case there's
|
|
1539
1634
|
// only range-dels, no file was opened, to save the range-dels, it need to
|
|
1540
1635
|
// create a new output file.
|
|
1541
|
-
|
|
1542
|
-
|
|
1636
|
+
return sub_compact->CloseCompactionFiles(status, open_file_func,
|
|
1637
|
+
close_file_func);
|
|
1638
|
+
}
|
|
1543
1639
|
|
|
1640
|
+
Status CompactionJob::FinalizeBlobFiles(SubcompactionState* sub_compact,
|
|
1641
|
+
BlobFileBuilder* blob_file_builder,
|
|
1642
|
+
Status status) {
|
|
1544
1643
|
if (blob_file_builder) {
|
|
1545
1644
|
if (status.ok()) {
|
|
1546
1645
|
status = blob_file_builder->Finish();
|
|
1547
1646
|
} else {
|
|
1548
1647
|
blob_file_builder->Abandon(status);
|
|
1549
1648
|
}
|
|
1550
|
-
blob_file_builder.reset();
|
|
1551
1649
|
sub_compact->Current().UpdateBlobStats();
|
|
1552
1650
|
}
|
|
1553
1651
|
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
cur_cpu_micros - prev_cpu_micros;
|
|
1557
|
-
RecordTick(stats_, COMPACTION_CPU_TOTAL_TIME,
|
|
1558
|
-
cur_cpu_micros - last_cpu_micros);
|
|
1652
|
+
return status;
|
|
1653
|
+
}
|
|
1559
1654
|
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
IOSTATS(range_sync_nanos) - prev_range_sync_nanos;
|
|
1567
|
-
sub_compact->compaction_job_stats.file_prepare_write_nanos +=
|
|
1568
|
-
IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos;
|
|
1569
|
-
sub_compact->compaction_job_stats.cpu_micros -=
|
|
1570
|
-
(IOSTATS(cpu_write_nanos) - prev_cpu_write_nanos +
|
|
1571
|
-
IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos) /
|
|
1572
|
-
1000;
|
|
1573
|
-
if (prev_perf_level != PerfLevel::kEnableTimeAndCPUTimeExceptForMutex) {
|
|
1574
|
-
SetPerfLevel(prev_perf_level);
|
|
1575
|
-
}
|
|
1655
|
+
void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
1656
|
+
assert(sub_compact);
|
|
1657
|
+
assert(sub_compact->compaction);
|
|
1658
|
+
|
|
1659
|
+
if (!ShouldUseLocalCompaction(sub_compact)) {
|
|
1660
|
+
return;
|
|
1576
1661
|
}
|
|
1662
|
+
|
|
1663
|
+
AutoThreadOperationStageUpdater stage_updater(
|
|
1664
|
+
ThreadStatus::STAGE_COMPACTION_PROCESS_KV);
|
|
1665
|
+
|
|
1666
|
+
const uint64_t start_cpu_micros = db_options_.clock->CPUMicros();
|
|
1667
|
+
uint64_t prev_cpu_micros = start_cpu_micros;
|
|
1668
|
+
const CompactionIOStatsSnapshot io_stats = InitializeIOStats();
|
|
1669
|
+
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
|
|
1670
|
+
const CompactionFilter* compaction_filter;
|
|
1671
|
+
std::unique_ptr<CompactionFilter> compaction_filter_from_factory = nullptr;
|
|
1672
|
+
Status filter_status = SetupAndValidateCompactionFilter(
|
|
1673
|
+
sub_compact, cfd->ioptions().compaction_filter, compaction_filter,
|
|
1674
|
+
compaction_filter_from_factory);
|
|
1675
|
+
if (!filter_status.ok()) {
|
|
1676
|
+
sub_compact->status = filter_status;
|
|
1677
|
+
return;
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
NotifyOnSubcompactionBegin(sub_compact);
|
|
1681
|
+
|
|
1682
|
+
SubcompactionKeyBoundaries boundaries(sub_compact->start, sub_compact->end);
|
|
1683
|
+
SubcompactionInternalIterators iterators;
|
|
1684
|
+
ReadOptions read_options;
|
|
1685
|
+
const WriteOptions write_options(Env::IOPriority::IO_LOW,
|
|
1686
|
+
Env::IOActivity::kCompaction);
|
|
1687
|
+
MergeHelper merge(
|
|
1688
|
+
env_, cfd->user_comparator(), cfd->ioptions().merge_operator.get(),
|
|
1689
|
+
compaction_filter, db_options_.info_log.get(),
|
|
1690
|
+
false /* internal key corruption is expected */,
|
|
1691
|
+
job_context_->GetLatestSnapshotSequence(), job_context_->snapshot_checker,
|
|
1692
|
+
compact_->compaction->level(), db_options_.stats);
|
|
1693
|
+
BlobFileResources blob_resources;
|
|
1694
|
+
|
|
1695
|
+
InternalIterator* input_iter = CreateInputIterator(
|
|
1696
|
+
sub_compact, cfd, iterators, boundaries, read_options);
|
|
1697
|
+
assert(input_iter);
|
|
1698
|
+
input_iter->SeekToFirst();
|
|
1699
|
+
|
|
1700
|
+
auto c_iter =
|
|
1701
|
+
CreateCompactionIterator(sub_compact, cfd, input_iter, compaction_filter,
|
|
1702
|
+
merge, blob_resources, write_options);
|
|
1703
|
+
assert(c_iter);
|
|
1704
|
+
c_iter->SeekToFirst();
|
|
1705
|
+
|
|
1706
|
+
TEST_SYNC_POINT("CompactionJob::Run():Inprogress");
|
|
1707
|
+
TEST_SYNC_POINT_CALLBACK("CompactionJob::Run():PausingManualCompaction:1",
|
|
1708
|
+
static_cast<void*>(const_cast<std::atomic<bool>*>(
|
|
1709
|
+
&manual_compaction_canceled_)));
|
|
1710
|
+
|
|
1711
|
+
auto [open_file_func, close_file_func] =
|
|
1712
|
+
CreateFileHandlers(sub_compact, boundaries);
|
|
1713
|
+
|
|
1714
|
+
Status status =
|
|
1715
|
+
ProcessKeyValue(sub_compact, cfd, c_iter.get(), open_file_func,
|
|
1716
|
+
close_file_func, prev_cpu_micros);
|
|
1717
|
+
|
|
1718
|
+
status = FinalizeProcessKeyValueStatus(cfd, input_iter, c_iter.get(), status);
|
|
1719
|
+
|
|
1720
|
+
FinalizeSubcompaction(sub_compact, status, open_file_func, close_file_func,
|
|
1721
|
+
blob_resources.blob_file_builder.get(), c_iter.get(),
|
|
1722
|
+
input_iter, start_cpu_micros, prev_cpu_micros,
|
|
1723
|
+
io_stats);
|
|
1724
|
+
|
|
1725
|
+
NotifyOnSubcompactionCompleted(sub_compact);
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
void CompactionJob::FinalizeSubcompaction(
|
|
1729
|
+
SubcompactionState* sub_compact, Status status,
|
|
1730
|
+
const CompactionFileOpenFunc& open_file_func,
|
|
1731
|
+
const CompactionFileCloseFunc& close_file_func,
|
|
1732
|
+
BlobFileBuilder* blob_file_builder, CompactionIterator* c_iter,
|
|
1733
|
+
[[maybe_unused]] InternalIterator* input_iter, uint64_t start_cpu_micros,
|
|
1734
|
+
uint64_t prev_cpu_micros, const CompactionIOStatsSnapshot& io_stats) {
|
|
1735
|
+
status = CleanupCompactionFiles(sub_compact, status, open_file_func,
|
|
1736
|
+
close_file_func);
|
|
1737
|
+
status = FinalizeBlobFiles(sub_compact, blob_file_builder, status);
|
|
1738
|
+
|
|
1739
|
+
FinalizeSubcompactionJobStats(sub_compact, c_iter, start_cpu_micros,
|
|
1740
|
+
prev_cpu_micros, io_stats);
|
|
1741
|
+
|
|
1577
1742
|
#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
|
|
1578
1743
|
if (!status.ok()) {
|
|
1579
1744
|
if (c_iter) {
|
|
1580
1745
|
c_iter->status().PermitUncheckedError();
|
|
1581
1746
|
}
|
|
1582
|
-
if (
|
|
1583
|
-
|
|
1747
|
+
if (input_iter) {
|
|
1748
|
+
input_iter->status().PermitUncheckedError();
|
|
1584
1749
|
}
|
|
1585
1750
|
}
|
|
1586
1751
|
#endif // ROCKSDB_ASSERT_STATUS_CHECKED
|
|
1587
1752
|
|
|
1588
|
-
blob_counter.reset();
|
|
1589
|
-
clip.reset();
|
|
1590
|
-
raw_input.reset();
|
|
1591
1753
|
sub_compact->status = status;
|
|
1592
|
-
NotifyOnSubcompactionCompleted(sub_compact);
|
|
1593
1754
|
}
|
|
1594
1755
|
|
|
1595
1756
|
uint64_t CompactionJob::GetCompactionId(SubcompactionState* sub_compact) const {
|
|
@@ -2106,7 +2267,7 @@ void CopyPrefix(const Slice& src, size_t prefix_length, std::string* dst) {
|
|
|
2106
2267
|
}
|
|
2107
2268
|
} // namespace
|
|
2108
2269
|
|
|
2109
|
-
bool CompactionJob::
|
|
2270
|
+
bool CompactionJob::UpdateInternalStatsFromInputFiles(
|
|
2110
2271
|
uint64_t* num_input_range_del) {
|
|
2111
2272
|
assert(compact_);
|
|
2112
2273
|
|
|
@@ -2189,7 +2350,7 @@ bool CompactionJob::BuildStatsFromInputTableProperties(
|
|
|
2189
2350
|
return !has_error;
|
|
2190
2351
|
}
|
|
2191
2352
|
|
|
2192
|
-
void CompactionJob::
|
|
2353
|
+
void CompactionJob::UpdateCompactionJobInputStatsFromInternalStats(
|
|
2193
2354
|
const InternalStats::CompactionStatsFull& internal_stats,
|
|
2194
2355
|
uint64_t num_input_range_del) const {
|
|
2195
2356
|
assert(job_stats_);
|
|
@@ -2242,7 +2403,7 @@ void CompactionJob::UpdateCompactionJobInputStats(
|
|
|
2242
2403
|
}
|
|
2243
2404
|
}
|
|
2244
2405
|
|
|
2245
|
-
void CompactionJob::
|
|
2406
|
+
void CompactionJob::UpdateCompactionJobOutputStatsFromInternalStats(
|
|
2246
2407
|
const InternalStats::CompactionStatsFull& internal_stats) const {
|
|
2247
2408
|
assert(job_stats_);
|
|
2248
2409
|
job_stats_->elapsed_micros = internal_stats.output_level_stats.micros;
|
|
@@ -2378,6 +2539,11 @@ Status CompactionJob::VerifyInputRecordCount(
|
|
|
2378
2539
|
"number of keys processed. Expected " +
|
|
2379
2540
|
std::to_string(expected) + " but processed " +
|
|
2380
2541
|
std::to_string(actual) + ". Compaction summary: " + scratch;
|
|
2542
|
+
ROCKS_LOG_WARN(
|
|
2543
|
+
db_options_.info_log,
|
|
2544
|
+
"[%s] [JOB %d] VerifyInputRecordCount() Status: %s",
|
|
2545
|
+
compact_->compaction->column_family_data()->GetName().c_str(),
|
|
2546
|
+
job_context_->job_id, msg.c_str());
|
|
2381
2547
|
if (db_options_.compaction_verify_record_count) {
|
|
2382
2548
|
return Status::Corruption(msg);
|
|
2383
2549
|
}
|
|
@@ -2386,4 +2552,38 @@ Status CompactionJob::VerifyInputRecordCount(
|
|
|
2386
2552
|
return Status::OK();
|
|
2387
2553
|
}
|
|
2388
2554
|
|
|
2555
|
+
Status CompactionJob::VerifyOutputRecordCount() const {
|
|
2556
|
+
uint64_t total_output_num = 0;
|
|
2557
|
+
for (const auto& state : compact_->sub_compact_states) {
|
|
2558
|
+
for (const auto& output : state.GetOutputs()) {
|
|
2559
|
+
total_output_num += output.table_properties->num_entries -
|
|
2560
|
+
output.table_properties->num_range_deletions;
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
|
|
2564
|
+
uint64_t expected = internal_stats_.output_level_stats.num_output_records;
|
|
2565
|
+
if (internal_stats_.has_proximal_level_output) {
|
|
2566
|
+
expected += internal_stats_.proximal_level_stats.num_output_records;
|
|
2567
|
+
}
|
|
2568
|
+
if (expected != total_output_num) {
|
|
2569
|
+
char scratch[2345];
|
|
2570
|
+
compact_->compaction->Summary(scratch, sizeof(scratch));
|
|
2571
|
+
std::string msg =
|
|
2572
|
+
"Number of keys in compaction output SST files does not match "
|
|
2573
|
+
"number of keys added. Expected " +
|
|
2574
|
+
std::to_string(expected) + " but there are " +
|
|
2575
|
+
std::to_string(total_output_num) +
|
|
2576
|
+
" in output SST files. Compaction summary: " + scratch;
|
|
2577
|
+
ROCKS_LOG_WARN(
|
|
2578
|
+
db_options_.info_log,
|
|
2579
|
+
"[%s] [JOB %d] VerifyOutputRecordCount() status: %s",
|
|
2580
|
+
compact_->compaction->column_family_data()->GetName().c_str(),
|
|
2581
|
+
job_context_->job_id, msg.c_str());
|
|
2582
|
+
if (db_options_.compaction_verify_record_count) {
|
|
2583
|
+
return Status::Corruption(msg);
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
return Status::OK();
|
|
2587
|
+
}
|
|
2588
|
+
|
|
2389
2589
|
} // namespace ROCKSDB_NAMESPACE
|