@nxtedition/rocksdb 13.1.5 → 13.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +62 -15
- package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
- package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
- package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
- package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
- package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
- package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
- package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
- package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
- package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
- package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
- package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
- package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
- package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
- package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
- package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
- package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
- package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
- package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
- package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
- package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
- package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
- package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
- package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
- package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
- package/deps/rocksdb/rocksdb/port/port.h +5 -9
- package/deps/rocksdb/rocksdb/src.mk +8 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/format.cc +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
- package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
- package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
- package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
9
|
|
|
10
10
|
#pragma once
|
|
11
|
+
|
|
12
|
+
#include "db/snapshot_checker.h"
|
|
11
13
|
#include "db/version_set.h"
|
|
12
14
|
#include "memory/arena.h"
|
|
13
15
|
#include "options/cf_options.h"
|
|
@@ -90,6 +92,8 @@ class Compaction {
|
|
|
90
92
|
CompressionOptions compression_opts,
|
|
91
93
|
Temperature output_temperature, uint32_t max_subcompactions,
|
|
92
94
|
std::vector<FileMetaData*> grandparents,
|
|
95
|
+
std::optional<SequenceNumber> earliest_snapshot,
|
|
96
|
+
const SnapshotChecker* snapshot_checker,
|
|
93
97
|
bool manual_compaction = false, const std::string& trim_ts = "",
|
|
94
98
|
double score = -1, bool deletion_compaction = false,
|
|
95
99
|
bool l0_files_might_overlap = true,
|
|
@@ -180,6 +184,16 @@ class Compaction {
|
|
|
180
184
|
return &input_levels_[compaction_input_level];
|
|
181
185
|
}
|
|
182
186
|
|
|
187
|
+
// Returns the filtered input files of the specified compaction input level.
|
|
188
|
+
// For now, only non start level is filtered.
|
|
189
|
+
const std::vector<FileMetaData*>& filtered_input_levels(
|
|
190
|
+
size_t compaction_input_level) const {
|
|
191
|
+
const std::vector<FileMetaData*>& filtered_input_level =
|
|
192
|
+
filtered_input_levels_[compaction_input_level];
|
|
193
|
+
assert(compaction_input_level != 0 || filtered_input_level.size() == 0);
|
|
194
|
+
return filtered_input_level;
|
|
195
|
+
}
|
|
196
|
+
|
|
183
197
|
// Maximum size of files to build during this compaction.
|
|
184
198
|
uint64_t max_output_file_size() const { return max_output_file_size_; }
|
|
185
199
|
|
|
@@ -401,6 +415,12 @@ class Compaction {
|
|
|
401
415
|
return blob_garbage_collection_age_cutoff_;
|
|
402
416
|
}
|
|
403
417
|
|
|
418
|
+
// start and end are sub compact range. Null if no boundary.
|
|
419
|
+
// This is used to calculate the newest_key_time table property after
|
|
420
|
+
// compaction.
|
|
421
|
+
uint64_t MaxInputFileNewestKeyTime(const InternalKey* start,
|
|
422
|
+
const InternalKey* end) const;
|
|
423
|
+
|
|
404
424
|
// start and end are sub compact range. Null if no boundary.
|
|
405
425
|
// This is used to filter out some input files' ancester's time range.
|
|
406
426
|
uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
|
|
@@ -430,10 +450,11 @@ class Compaction {
|
|
|
430
450
|
// penultimate level. The safe key range is populated by
|
|
431
451
|
// `PopulatePenultimateLevelOutputRange()`.
|
|
432
452
|
// Which could potentially disable all penultimate level output.
|
|
433
|
-
static int EvaluatePenultimateLevel(
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
453
|
+
static int EvaluatePenultimateLevel(
|
|
454
|
+
const VersionStorageInfo* vstorage,
|
|
455
|
+
const MutableCFOptions& mutable_cf_options,
|
|
456
|
+
const ImmutableOptions& immutable_options, const int start_level,
|
|
457
|
+
const int output_level);
|
|
437
458
|
|
|
438
459
|
// mark (or clear) all files that are being compacted
|
|
439
460
|
void MarkFilesBeingCompacted(bool being_compacted) const;
|
|
@@ -460,6 +481,13 @@ class Compaction {
|
|
|
460
481
|
// `Compaction::WithinPenultimateLevelOutputRange()`.
|
|
461
482
|
void PopulatePenultimateLevelOutputRange();
|
|
462
483
|
|
|
484
|
+
// If oldest snapshot is specified at Compaction construction time, we have
|
|
485
|
+
// an opportunity to optimize inputs for compaction iterator for this case:
|
|
486
|
+
// When a standalone range deletion file on the start level is recognized and
|
|
487
|
+
// can be determined to completely shadow some input files on non-start level.
|
|
488
|
+
// These files will be filtered out and later not feed to compaction iterator.
|
|
489
|
+
void FilterInputsForCompactionIterator();
|
|
490
|
+
|
|
463
491
|
// Get the atomic file boundaries for all files in the compaction. Necessary
|
|
464
492
|
// in order to avoid the scenario described in
|
|
465
493
|
// https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and
|
|
@@ -510,12 +538,30 @@ class Compaction {
|
|
|
510
538
|
// Compaction input files organized by level. Constant after construction
|
|
511
539
|
const std::vector<CompactionInputFiles> inputs_;
|
|
512
540
|
|
|
513
|
-
//
|
|
541
|
+
// All files from inputs_ that are not filtered and will be fed to compaction
|
|
542
|
+
// iterator, organized more closely in memory.
|
|
514
543
|
autovector<LevelFilesBrief, 2> input_levels_;
|
|
515
544
|
|
|
516
545
|
// State used to check for number of overlapping grandparent files
|
|
517
546
|
// (grandparent == "output_level_ + 1")
|
|
518
547
|
std::vector<FileMetaData*> grandparents_;
|
|
548
|
+
|
|
549
|
+
// The earliest snapshot and snapshot checker at compaction picking time.
|
|
550
|
+
// These fields are only set for deletion triggered compactions picked in
|
|
551
|
+
// universal compaction. And when user-defined timestamp is not enabled.
|
|
552
|
+
// It will be used to possibly filter out some non start level input files.
|
|
553
|
+
std::optional<SequenceNumber> earliest_snapshot_;
|
|
554
|
+
const SnapshotChecker* snapshot_checker_;
|
|
555
|
+
|
|
556
|
+
// Markers for which non start level input files are filtered out if
|
|
557
|
+
// applicable. Only applicable if earliest_snapshot_ is provided and input
|
|
558
|
+
// start level has a standalone range deletion file. Filtered files are
|
|
559
|
+
// tracked in `filtered_input_levels_`.
|
|
560
|
+
std::vector<std::vector<bool>> non_start_level_input_files_filtered_;
|
|
561
|
+
|
|
562
|
+
// All files from inputs_ that are filtered.
|
|
563
|
+
std::vector<std::vector<FileMetaData*>> filtered_input_levels_;
|
|
564
|
+
|
|
519
565
|
const double score_; // score that was used to pick this compaction.
|
|
520
566
|
|
|
521
567
|
// Is this compaction creating a file in the bottom most level?
|
|
@@ -872,8 +872,8 @@ void CompactionIterator::NextFromInput() {
|
|
|
872
872
|
if (Valid()) {
|
|
873
873
|
at_next_ = true;
|
|
874
874
|
}
|
|
875
|
-
} else if (
|
|
876
|
-
(last_snapshot
|
|
875
|
+
} else if (last_sequence != kMaxSequenceNumber &&
|
|
876
|
+
(last_snapshot == current_user_key_snapshot_ ||
|
|
877
877
|
last_snapshot < current_user_key_snapshot_)) {
|
|
878
878
|
// If the earliest snapshot is which this key is visible in
|
|
879
879
|
// is the same as the visibility of a previous instance of the
|
|
@@ -540,18 +540,12 @@ class CompactionIterator {
|
|
|
540
540
|
|
|
541
541
|
inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
|
|
542
542
|
SequenceNumber snapshot) {
|
|
543
|
-
return (
|
|
544
|
-
(snapshot_checker_ == nullptr ||
|
|
545
|
-
LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
|
|
546
|
-
SnapshotCheckerResult::kInSnapshot)));
|
|
543
|
+
return DataIsDefinitelyInSnapshot(seq, snapshot, snapshot_checker_);
|
|
547
544
|
}
|
|
548
545
|
|
|
549
546
|
inline bool CompactionIterator::DefinitelyNotInSnapshot(
|
|
550
547
|
SequenceNumber seq, SequenceNumber snapshot) {
|
|
551
|
-
return (
|
|
552
|
-
(snapshot_checker_ != nullptr &&
|
|
553
|
-
UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
|
|
554
|
-
SnapshotCheckerResult::kNotInSnapshot)));
|
|
548
|
+
return DataIsDefinitelyNotInSnapshot(seq, snapshot, snapshot_checker_);
|
|
555
549
|
}
|
|
556
550
|
|
|
557
551
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -833,6 +833,14 @@ TEST_P(CompactionIteratorTest, ConvertToPutAtBottom) {
|
|
|
833
833
|
true /*bottomost_level*/);
|
|
834
834
|
}
|
|
835
835
|
|
|
836
|
+
TEST_P(CompactionIteratorTest, ZeroSeqOfKeyAndSnapshot) {
|
|
837
|
+
AddSnapshot(0);
|
|
838
|
+
const std::vector<std::string> input_keys = {
|
|
839
|
+
test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 0, kTypeValue)};
|
|
840
|
+
const std::vector<std::string> input_values = {"a1", "b1"};
|
|
841
|
+
RunTest(input_keys, input_values, input_keys, input_values);
|
|
842
|
+
}
|
|
843
|
+
|
|
836
844
|
INSTANTIATE_TEST_CASE_P(CompactionIteratorTestInstance, CompactionIteratorTest,
|
|
837
845
|
testing::Values(true, false));
|
|
838
846
|
|
|
@@ -1846,6 +1854,22 @@ TEST_P(CompactionIteratorTsGcTest, SingleDeleteAllKeysOlderThanThreshold) {
|
|
|
1846
1854
|
}
|
|
1847
1855
|
}
|
|
1848
1856
|
|
|
1857
|
+
TEST_P(CompactionIteratorTsGcTest, ZeroSeqOfKeyAndSnapshot) {
|
|
1858
|
+
AddSnapshot(0);
|
|
1859
|
+
std::string full_history_ts_low;
|
|
1860
|
+
PutFixed64(&full_history_ts_low, std::numeric_limits<uint64_t>::max());
|
|
1861
|
+
const std::vector<std::string> input_keys = {
|
|
1862
|
+
test::KeyStr(101, "a", 0, kTypeValue),
|
|
1863
|
+
test::KeyStr(102, "b", 0, kTypeValue)};
|
|
1864
|
+
const std::vector<std::string> input_values = {"a1", "b1"};
|
|
1865
|
+
RunTest(input_keys, input_values, input_keys, input_values,
|
|
1866
|
+
/*last_committed_seq=*/kMaxSequenceNumber,
|
|
1867
|
+
/*merge_operator=*/nullptr, /*compaction_filter=*/nullptr,
|
|
1868
|
+
/*bottommost_level=*/false,
|
|
1869
|
+
/*earliest_write_conflict_snapshot=*/kMaxSequenceNumber,
|
|
1870
|
+
/*key_not_exists_beyond_output_level=*/false, &full_history_ts_low);
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1849
1873
|
INSTANTIATE_TEST_CASE_P(CompactionIteratorTsGcTestInstance,
|
|
1850
1874
|
CompactionIteratorTsGcTest,
|
|
1851
1875
|
testing::Values(true, false));
|
|
@@ -288,8 +288,8 @@ void CompactionJob::Prepare() {
|
|
|
288
288
|
// to encode seqno->time to the output files.
|
|
289
289
|
|
|
290
290
|
uint64_t preserve_time_duration =
|
|
291
|
-
std::max(c->
|
|
292
|
-
c->
|
|
291
|
+
std::max(c->mutable_cf_options()->preserve_internal_time_seconds,
|
|
292
|
+
c->mutable_cf_options()->preclude_last_level_data_seconds);
|
|
293
293
|
|
|
294
294
|
if (preserve_time_duration > 0) {
|
|
295
295
|
const ReadOptions read_options(Env::IOActivity::kCompaction);
|
|
@@ -326,8 +326,8 @@ void CompactionJob::Prepare() {
|
|
|
326
326
|
seqno_to_time_mapping_.Enforce(_current_time);
|
|
327
327
|
seqno_to_time_mapping_.GetCurrentTieringCutoffSeqnos(
|
|
328
328
|
static_cast<uint64_t>(_current_time),
|
|
329
|
-
c->
|
|
330
|
-
c->
|
|
329
|
+
c->mutable_cf_options()->preserve_internal_time_seconds,
|
|
330
|
+
c->mutable_cf_options()->preclude_last_level_data_seconds,
|
|
331
331
|
&preserve_time_min_seqno_, &preclude_last_level_min_seqno_);
|
|
332
332
|
}
|
|
333
333
|
// For accuracy of the GetProximalSeqnoBeforeTime queries above, we only
|
|
@@ -469,7 +469,7 @@ void CompactionJob::GenSubcompactionBoundaries() {
|
|
|
469
469
|
ReadOptions read_options(Env::IOActivity::kCompaction);
|
|
470
470
|
read_options.rate_limiter_priority = GetRateLimiterPriority();
|
|
471
471
|
auto* c = compact_->compaction;
|
|
472
|
-
if (c->
|
|
472
|
+
if (c->mutable_cf_options()->table_factory->Name() ==
|
|
473
473
|
TableFactory::kPlainTableName()) {
|
|
474
474
|
return;
|
|
475
475
|
}
|
|
@@ -506,9 +506,7 @@ void CompactionJob::GenSubcompactionBoundaries() {
|
|
|
506
506
|
FileMetaData* f = flevel->files[i].file_metadata;
|
|
507
507
|
std::vector<TableReader::Anchor> my_anchors;
|
|
508
508
|
Status s = cfd->table_cache()->ApproximateKeyAnchors(
|
|
509
|
-
read_options, icomp, *f,
|
|
510
|
-
c->mutable_cf_options()->block_protection_bytes_per_key,
|
|
511
|
-
my_anchors);
|
|
509
|
+
read_options, icomp, *f, *c->mutable_cf_options(), my_anchors);
|
|
512
510
|
if (!s.ok() || my_anchors.empty()) {
|
|
513
511
|
my_anchors.emplace_back(f->largest.user_key(), f->fd.GetFileSize());
|
|
514
512
|
}
|
|
@@ -711,8 +709,6 @@ Status CompactionJob::Run() {
|
|
|
711
709
|
}
|
|
712
710
|
}
|
|
713
711
|
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
|
|
714
|
-
auto& prefix_extractor =
|
|
715
|
-
compact_->compaction->mutable_cf_options()->prefix_extractor;
|
|
716
712
|
std::atomic<size_t> next_file_idx(0);
|
|
717
713
|
auto verify_table = [&](Status& output_status) {
|
|
718
714
|
while (true) {
|
|
@@ -733,7 +729,8 @@ Status CompactionJob::Run() {
|
|
|
733
729
|
InternalIterator* iter = cfd->table_cache()->NewIterator(
|
|
734
730
|
verify_table_read_options, file_options_,
|
|
735
731
|
cfd->internal_comparator(), files_output[file_idx]->meta,
|
|
736
|
-
/*range_del_agg=*/nullptr,
|
|
732
|
+
/*range_del_agg=*/nullptr,
|
|
733
|
+
*compact_->compaction->mutable_cf_options(),
|
|
737
734
|
/*table_reader_ptr=*/nullptr,
|
|
738
735
|
cfd->internal_stats()->GetFileReadHist(
|
|
739
736
|
compact_->compaction->output_level()),
|
|
@@ -743,9 +740,7 @@ Status CompactionJob::Run() {
|
|
|
743
740
|
*compact_->compaction->mutable_cf_options()),
|
|
744
741
|
/*smallest_compaction_key=*/nullptr,
|
|
745
742
|
/*largest_compaction_key=*/nullptr,
|
|
746
|
-
/*allow_unprepared_value=*/false
|
|
747
|
-
compact_->compaction->mutable_cf_options()
|
|
748
|
-
->block_protection_bytes_per_key);
|
|
743
|
+
/*allow_unprepared_value=*/false);
|
|
749
744
|
auto s = iter->status();
|
|
750
745
|
|
|
751
746
|
if (s.ok() && paranoid_file_checks_) {
|
|
@@ -916,19 +911,23 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options,
|
|
|
916
911
|
ROCKS_LOG_BUFFER(
|
|
917
912
|
log_buffer_,
|
|
918
913
|
"[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
|
|
919
|
-
"files in(%d, %d) out(%d +%d blob) "
|
|
920
|
-
"MB in(%.1f, %.1f +%.1f blob) out(%.1f +%.1f blob), "
|
|
914
|
+
"files in(%d, %d) filtered(%d, %d) out(%d +%d blob) "
|
|
915
|
+
"MB in(%.1f, %.1f +%.1f blob) filtered(%.1f, %.1f) out(%.1f +%.1f blob), "
|
|
921
916
|
"read-write-amplify(%.1f) write-amplify(%.1f) %s, records in: %" PRIu64
|
|
922
917
|
", records dropped: %" PRIu64 " output_compression: %s\n",
|
|
923
918
|
column_family_name.c_str(), vstorage->LevelSummary(&tmp),
|
|
924
919
|
bytes_read_per_sec, bytes_written_per_sec,
|
|
925
920
|
compact_->compaction->output_level(),
|
|
926
921
|
stats.num_input_files_in_non_output_levels,
|
|
927
|
-
stats.num_input_files_in_output_level,
|
|
922
|
+
stats.num_input_files_in_output_level,
|
|
923
|
+
stats.num_filtered_input_files_in_non_output_levels,
|
|
924
|
+
stats.num_filtered_input_files_in_output_level, stats.num_output_files,
|
|
928
925
|
stats.num_output_files_blob, stats.bytes_read_non_output_levels / kMB,
|
|
929
926
|
stats.bytes_read_output_level / kMB, stats.bytes_read_blob / kMB,
|
|
930
|
-
stats.
|
|
931
|
-
|
|
927
|
+
stats.bytes_skipped_non_output_levels / kMB,
|
|
928
|
+
stats.bytes_skipped_output_level / kMB, stats.bytes_written / kMB,
|
|
929
|
+
stats.bytes_written_blob / kMB, read_write_amp, write_amp,
|
|
930
|
+
status.ToString().c_str(), stats.num_input_records,
|
|
932
931
|
stats.num_dropped_records,
|
|
933
932
|
CompressionTypeToString(compact_->compaction->output_compression())
|
|
934
933
|
.c_str());
|
|
@@ -1587,6 +1586,8 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|
|
1587
1586
|
const uint64_t current_entries = outputs.NumEntries();
|
|
1588
1587
|
|
|
1589
1588
|
s = outputs.Finish(s, seqno_to_time_mapping_);
|
|
1589
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
1590
|
+
"CompactionJob::FinishCompactionOutputFile()::AfterFinish", &s);
|
|
1590
1591
|
|
|
1591
1592
|
if (s.ok()) {
|
|
1592
1593
|
// With accurate smallest and largest key, we can get a slightly more
|
|
@@ -1919,6 +1920,10 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
|
1919
1920
|
oldest_ancester_time = current_time;
|
|
1920
1921
|
}
|
|
1921
1922
|
|
|
1923
|
+
uint64_t newest_key_time = sub_compact->compaction->MaxInputFileNewestKeyTime(
|
|
1924
|
+
sub_compact->start.has_value() ? &tmp_start : nullptr,
|
|
1925
|
+
sub_compact->end.has_value() ? &tmp_end : nullptr);
|
|
1926
|
+
|
|
1922
1927
|
// Initialize a SubcompactionState::Output and add it to sub_compact->outputs
|
|
1923
1928
|
uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
|
|
1924
1929
|
{
|
|
@@ -1968,7 +1973,7 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
|
1968
1973
|
cfd->internal_tbl_prop_coll_factories(),
|
|
1969
1974
|
sub_compact->compaction->output_compression(),
|
|
1970
1975
|
sub_compact->compaction->output_compression_opts(), cfd->GetID(),
|
|
1971
|
-
cfd->GetName(), sub_compact->compaction->output_level(),
|
|
1976
|
+
cfd->GetName(), sub_compact->compaction->output_level(), newest_key_time,
|
|
1972
1977
|
bottommost_level_, TableFileCreationReason::kCompaction,
|
|
1973
1978
|
0 /* oldest_key_time */, current_time, db_id_, db_session_id_,
|
|
1974
1979
|
sub_compact->compaction->max_output_file_size(), file_number,
|
|
@@ -2011,7 +2016,8 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
|
|
|
2011
2016
|
for (int input_level = 0;
|
|
2012
2017
|
input_level < static_cast<int>(compaction->num_input_levels());
|
|
2013
2018
|
++input_level) {
|
|
2014
|
-
|
|
2019
|
+
const LevelFilesBrief* flevel = compaction->input_levels(input_level);
|
|
2020
|
+
size_t num_input_files = flevel->num_files;
|
|
2015
2021
|
uint64_t* bytes_read;
|
|
2016
2022
|
if (compaction->level(input_level) != compaction->output_level()) {
|
|
2017
2023
|
compaction_stats_.stats.num_input_files_in_non_output_levels +=
|
|
@@ -2023,7 +2029,7 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
|
|
|
2023
2029
|
bytes_read = &compaction_stats_.stats.bytes_read_output_level;
|
|
2024
2030
|
}
|
|
2025
2031
|
for (size_t i = 0; i < num_input_files; ++i) {
|
|
2026
|
-
const FileMetaData* file_meta =
|
|
2032
|
+
const FileMetaData* file_meta = flevel->files[i].file_metadata;
|
|
2027
2033
|
*bytes_read += file_meta->fd.GetFileSize();
|
|
2028
2034
|
uint64_t file_input_entries = file_meta->num_entries;
|
|
2029
2035
|
uint64_t file_num_range_del = file_meta->num_range_deletions;
|
|
@@ -2046,6 +2052,23 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) {
|
|
|
2046
2052
|
*num_input_range_del += file_num_range_del;
|
|
2047
2053
|
}
|
|
2048
2054
|
}
|
|
2055
|
+
|
|
2056
|
+
const std::vector<FileMetaData*>& filtered_flevel =
|
|
2057
|
+
compaction->filtered_input_levels(input_level);
|
|
2058
|
+
size_t num_filtered_input_files = filtered_flevel.size();
|
|
2059
|
+
uint64_t* bytes_skipped;
|
|
2060
|
+
if (compaction->level(input_level) != compaction->output_level()) {
|
|
2061
|
+
compaction_stats_.stats.num_filtered_input_files_in_non_output_levels +=
|
|
2062
|
+
static_cast<int>(num_filtered_input_files);
|
|
2063
|
+
bytes_skipped = &compaction_stats_.stats.bytes_skipped_non_output_levels;
|
|
2064
|
+
} else {
|
|
2065
|
+
compaction_stats_.stats.num_filtered_input_files_in_output_level +=
|
|
2066
|
+
static_cast<int>(num_filtered_input_files);
|
|
2067
|
+
bytes_skipped = &compaction_stats_.stats.bytes_skipped_output_level;
|
|
2068
|
+
}
|
|
2069
|
+
for (const FileMetaData* filtered_file_meta : filtered_flevel) {
|
|
2070
|
+
*bytes_skipped += filtered_file_meta->fd.GetFileSize();
|
|
2071
|
+
}
|
|
2049
2072
|
}
|
|
2050
2073
|
|
|
2051
2074
|
assert(compaction_job_stats_);
|
|
@@ -2070,6 +2093,13 @@ void CompactionJob::UpdateCompactionJobStats(
|
|
|
2070
2093
|
stats.num_input_files_in_output_level;
|
|
2071
2094
|
compaction_job_stats_->num_input_files_at_output_level =
|
|
2072
2095
|
stats.num_input_files_in_output_level;
|
|
2096
|
+
compaction_job_stats_->num_filtered_input_files =
|
|
2097
|
+
stats.num_filtered_input_files_in_non_output_levels +
|
|
2098
|
+
stats.num_filtered_input_files_in_output_level;
|
|
2099
|
+
compaction_job_stats_->num_filtered_input_files_at_output_level =
|
|
2100
|
+
stats.num_filtered_input_files_in_output_level;
|
|
2101
|
+
compaction_job_stats_->total_skipped_input_bytes =
|
|
2102
|
+
stats.bytes_skipped_non_output_levels + stats.bytes_skipped_output_level;
|
|
2073
2103
|
|
|
2074
2104
|
// output information
|
|
2075
2105
|
compaction_job_stats_->total_output_bytes = stats.bytes_written;
|
|
@@ -215,8 +215,7 @@ class CompactionJob {
|
|
|
215
215
|
virtual void RecordCompactionIOStats();
|
|
216
216
|
void CleanupCompaction();
|
|
217
217
|
|
|
218
|
-
//
|
|
219
|
-
// kv-pairs
|
|
218
|
+
// Iterate through input and compact the kv-pairs.
|
|
220
219
|
void ProcessKeyValueCompaction(SubcompactionState* sub_compact);
|
|
221
220
|
|
|
222
221
|
CompactionState* compact_;
|
|
@@ -386,7 +385,7 @@ struct CompactionServiceInput {
|
|
|
386
385
|
// files needed for this compaction, for both input level files and output
|
|
387
386
|
// level files.
|
|
388
387
|
std::vector<std::string> input_files;
|
|
389
|
-
int output_level;
|
|
388
|
+
int output_level = 0;
|
|
390
389
|
|
|
391
390
|
// db_id is used to generate unique id of sst on the remote compactor
|
|
392
391
|
std::string db_id;
|
|
@@ -397,7 +396,7 @@ struct CompactionServiceInput {
|
|
|
397
396
|
bool has_end = false;
|
|
398
397
|
std::string end;
|
|
399
398
|
|
|
400
|
-
uint64_t options_file_number;
|
|
399
|
+
uint64_t options_file_number = 0;
|
|
401
400
|
|
|
402
401
|
// serialization interface to read and write the object
|
|
403
402
|
static Status Read(const std::string& data_str, CompactionServiceInput* obj);
|
|
@@ -424,6 +423,7 @@ struct CompactionServiceOutputFile {
|
|
|
424
423
|
uint64_t paranoid_hash;
|
|
425
424
|
bool marked_for_compaction;
|
|
426
425
|
UniqueId64x2 unique_id{};
|
|
426
|
+
TableProperties table_properties;
|
|
427
427
|
|
|
428
428
|
CompactionServiceOutputFile() = default;
|
|
429
429
|
CompactionServiceOutputFile(
|
|
@@ -432,7 +432,8 @@ struct CompactionServiceOutputFile {
|
|
|
432
432
|
uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
|
|
433
433
|
uint64_t _epoch_number, const std::string& _file_checksum,
|
|
434
434
|
const std::string& _file_checksum_func_name, uint64_t _paranoid_hash,
|
|
435
|
-
bool _marked_for_compaction, UniqueId64x2 _unique_id
|
|
435
|
+
bool _marked_for_compaction, UniqueId64x2 _unique_id,
|
|
436
|
+
const TableProperties& _table_properties)
|
|
436
437
|
: file_name(name),
|
|
437
438
|
smallest_seqno(smallest),
|
|
438
439
|
largest_seqno(largest),
|
|
@@ -445,7 +446,8 @@ struct CompactionServiceOutputFile {
|
|
|
445
446
|
file_checksum_func_name(_file_checksum_func_name),
|
|
446
447
|
paranoid_hash(_paranoid_hash),
|
|
447
448
|
marked_for_compaction(_marked_for_compaction),
|
|
448
|
-
unique_id(std::move(_unique_id))
|
|
449
|
+
unique_id(std::move(_unique_id)),
|
|
450
|
+
table_properties(_table_properties) {}
|
|
449
451
|
};
|
|
450
452
|
|
|
451
453
|
// CompactionServiceResult contains the compaction result from a different db
|
|
@@ -454,7 +456,7 @@ struct CompactionServiceOutputFile {
|
|
|
454
456
|
struct CompactionServiceResult {
|
|
455
457
|
Status status;
|
|
456
458
|
std::vector<CompactionServiceOutputFile> output_files;
|
|
457
|
-
int output_level;
|
|
459
|
+
int output_level = 0;
|
|
458
460
|
|
|
459
461
|
// location of the output files
|
|
460
462
|
std::string output_path;
|
|
@@ -250,6 +250,7 @@ class CompactionJobTestBase : public testing::Test {
|
|
|
250
250
|
} else {
|
|
251
251
|
assert(false);
|
|
252
252
|
}
|
|
253
|
+
mutable_cf_options_.table_factory = cf_options_.table_factory;
|
|
253
254
|
}
|
|
254
255
|
|
|
255
256
|
std::string GenerateFileName(uint64_t file_number) {
|
|
@@ -300,13 +301,13 @@ class CompactionJobTestBase : public testing::Test {
|
|
|
300
301
|
const WriteOptions write_options;
|
|
301
302
|
std::unique_ptr<TableBuilder> table_builder(
|
|
302
303
|
cf_options_.table_factory->NewTableBuilder(
|
|
303
|
-
TableBuilderOptions(
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
304
|
+
TableBuilderOptions(
|
|
305
|
+
*cfd_->ioptions(), mutable_cf_options_, read_options,
|
|
306
|
+
write_options, cfd_->internal_comparator(),
|
|
307
|
+
cfd_->internal_tbl_prop_coll_factories(),
|
|
308
|
+
CompressionType::kNoCompression, CompressionOptions(),
|
|
309
|
+
0 /* column_family_id */, kDefaultColumnFamilyName,
|
|
310
|
+
-1 /* level */, kUnknownNewestKeyTime),
|
|
310
311
|
file_writer.get()));
|
|
311
312
|
// Build table.
|
|
312
313
|
for (const auto& kv : contents) {
|
|
@@ -651,7 +652,8 @@ class CompactionJobTestBase : public testing::Test {
|
|
|
651
652
|
mutable_cf_options_.target_file_size_base,
|
|
652
653
|
mutable_cf_options_.max_compaction_bytes, 0, kNoCompression,
|
|
653
654
|
cfd->GetLatestMutableCFOptions()->compression_opts,
|
|
654
|
-
Temperature::kUnknown, max_subcompactions, grandparents,
|
|
655
|
+
Temperature::kUnknown, max_subcompactions, grandparents,
|
|
656
|
+
/*earliest_snapshot*/ std::nullopt, /*snapshot_checker*/ nullptr, true);
|
|
655
657
|
compaction.FinalizeInputInfo(cfd->current());
|
|
656
658
|
|
|
657
659
|
assert(db_options_.info_log);
|
|
@@ -1660,6 +1662,16 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1660
1662
|
std::string file_checksum = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen));
|
|
1661
1663
|
std::string file_checksum_func_name = "MyAwesomeChecksumGenerator";
|
|
1662
1664
|
while (!rnd.OneIn(10)) {
|
|
1665
|
+
TableProperties tp;
|
|
1666
|
+
tp.user_collected_properties.emplace(
|
|
1667
|
+
"UCP_Key1", rnd.RandomString(rnd.Uniform(kStrMaxLen)));
|
|
1668
|
+
tp.user_collected_properties.emplace(
|
|
1669
|
+
"UCP_Key2", rnd.RandomString(rnd.Uniform(kStrMaxLen)));
|
|
1670
|
+
tp.readable_properties.emplace("RP_Key1",
|
|
1671
|
+
rnd.RandomString(rnd.Uniform(kStrMaxLen)));
|
|
1672
|
+
tp.readable_properties.emplace("RP_K2y2",
|
|
1673
|
+
rnd.RandomString(rnd.Uniform(kStrMaxLen)));
|
|
1674
|
+
|
|
1663
1675
|
UniqueId64x2 id{rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX)};
|
|
1664
1676
|
result.output_files.emplace_back(
|
|
1665
1677
|
rnd.RandomString(rnd.Uniform(kStrMaxLen)) /* file_name */,
|
|
@@ -1675,7 +1687,7 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1675
1687
|
file_checksum /* file_checksum */,
|
|
1676
1688
|
file_checksum_func_name /* file_checksum_func_name */,
|
|
1677
1689
|
rnd64.Uniform(UINT64_MAX) /* paranoid_hash */,
|
|
1678
|
-
rnd.OneIn(2) /* marked_for_compaction */, id);
|
|
1690
|
+
rnd.OneIn(2) /* marked_for_compaction */, id /* unique_id */, tp);
|
|
1679
1691
|
}
|
|
1680
1692
|
result.output_level = rnd.Uniform(10);
|
|
1681
1693
|
result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
|
|
@@ -1696,6 +1708,21 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1696
1708
|
ASSERT_OK(CompactionServiceResult::Read(output, &deserialized1));
|
|
1697
1709
|
ASSERT_TRUE(deserialized1.TEST_Equals(&result));
|
|
1698
1710
|
|
|
1711
|
+
for (size_t i = 0; i < result.output_files.size(); i++) {
|
|
1712
|
+
for (const auto& prop :
|
|
1713
|
+
result.output_files[i].table_properties.user_collected_properties) {
|
|
1714
|
+
ASSERT_EQ(deserialized1.output_files[i]
|
|
1715
|
+
.table_properties.user_collected_properties[prop.first],
|
|
1716
|
+
prop.second);
|
|
1717
|
+
}
|
|
1718
|
+
for (const auto& prop :
|
|
1719
|
+
result.output_files[i].table_properties.readable_properties) {
|
|
1720
|
+
ASSERT_EQ(deserialized1.output_files[i]
|
|
1721
|
+
.table_properties.readable_properties[prop.first],
|
|
1722
|
+
prop.second);
|
|
1723
|
+
}
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1699
1726
|
// Test mismatch
|
|
1700
1727
|
deserialized1.stats.num_input_files += 10;
|
|
1701
1728
|
std::string mismatch;
|
|
@@ -108,6 +108,12 @@ class CompactionOutputs {
|
|
|
108
108
|
Status Finish(const Status& intput_status,
|
|
109
109
|
const SeqnoToTimeMapping& seqno_to_time_mapping);
|
|
110
110
|
|
|
111
|
+
// Update output table properties from already populated TableProperties.
|
|
112
|
+
// Used for remote compaction
|
|
113
|
+
void UpdateTableProperties(const TableProperties& table_properties) {
|
|
114
|
+
current_output().table_properties =
|
|
115
|
+
std::make_shared<TableProperties>(table_properties);
|
|
116
|
+
}
|
|
111
117
|
// Update output table properties from table builder
|
|
112
118
|
void UpdateTableProperties() {
|
|
113
119
|
current_output().table_properties =
|
|
@@ -351,11 +351,11 @@ Compaction* CompactionPicker::CompactFiles(
|
|
|
351
351
|
break;
|
|
352
352
|
}
|
|
353
353
|
}
|
|
354
|
-
assert(output_level == 0 ||
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
354
|
+
assert(output_level == 0 || !FilesRangeOverlapWithCompaction(
|
|
355
|
+
input_files, output_level,
|
|
356
|
+
Compaction::EvaluatePenultimateLevel(
|
|
357
|
+
vstorage, mutable_cf_options, ioptions_,
|
|
358
|
+
start_level, output_level)));
|
|
359
359
|
#endif /* !NDEBUG */
|
|
360
360
|
|
|
361
361
|
CompressionType compression_type;
|
|
@@ -380,7 +380,8 @@ Compaction* CompactionPicker::CompactFiles(
|
|
|
380
380
|
GetCompressionOptions(mutable_cf_options, vstorage, output_level),
|
|
381
381
|
mutable_cf_options.default_write_temperature,
|
|
382
382
|
compact_options.max_subcompactions,
|
|
383
|
-
/* grandparents */ {},
|
|
383
|
+
/* grandparents */ {}, /* earliest_snapshot */ std::nullopt,
|
|
384
|
+
/* snapshot_checker */ nullptr, true);
|
|
384
385
|
RegisterCompaction(c);
|
|
385
386
|
return c;
|
|
386
387
|
}
|
|
@@ -658,8 +659,9 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
658
659
|
// overlaping outputs in the same level.
|
|
659
660
|
if (FilesRangeOverlapWithCompaction(
|
|
660
661
|
inputs, output_level,
|
|
661
|
-
Compaction::EvaluatePenultimateLevel(vstorage,
|
|
662
|
-
start_level,
|
|
662
|
+
Compaction::EvaluatePenultimateLevel(vstorage, mutable_cf_options,
|
|
663
|
+
ioptions_, start_level,
|
|
664
|
+
output_level))) {
|
|
663
665
|
// This compaction output could potentially conflict with the output
|
|
664
666
|
// of a currently running compaction, we cannot run it.
|
|
665
667
|
*manual_conflict = true;
|
|
@@ -677,7 +679,9 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
677
679
|
GetCompressionOptions(mutable_cf_options, vstorage, output_level),
|
|
678
680
|
mutable_cf_options.default_write_temperature,
|
|
679
681
|
compact_range_options.max_subcompactions,
|
|
680
|
-
/* grandparents */ {}, /*
|
|
682
|
+
/* grandparents */ {}, /* earliest_snapshot */ std::nullopt,
|
|
683
|
+
/* snapshot_checker */ nullptr,
|
|
684
|
+
/* is manual */ true, trim_ts, /* score */ -1,
|
|
681
685
|
/* deletion_compaction */ false, /* l0_files_might_overlap */ true,
|
|
682
686
|
CompactionReason::kUnknown,
|
|
683
687
|
compact_range_options.blob_garbage_collection_policy,
|
|
@@ -843,7 +847,8 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
843
847
|
// overlaping outputs in the same level.
|
|
844
848
|
if (FilesRangeOverlapWithCompaction(
|
|
845
849
|
compaction_inputs, output_level,
|
|
846
|
-
Compaction::EvaluatePenultimateLevel(vstorage,
|
|
850
|
+
Compaction::EvaluatePenultimateLevel(vstorage, mutable_cf_options,
|
|
851
|
+
ioptions_, input_level,
|
|
847
852
|
output_level))) {
|
|
848
853
|
// This compaction output could potentially conflict with the output
|
|
849
854
|
// of a currently running compaction, we cannot run it.
|
|
@@ -866,6 +871,7 @@ Compaction* CompactionPicker::CompactRange(
|
|
|
866
871
|
GetCompressionOptions(mutable_cf_options, vstorage, output_level),
|
|
867
872
|
mutable_cf_options.default_write_temperature,
|
|
868
873
|
compact_range_options.max_subcompactions, std::move(grandparents),
|
|
874
|
+
/* earliest_snapshot */ std::nullopt, /* snapshot_checker */ nullptr,
|
|
869
875
|
/* is manual */ true, trim_ts, /* score */ -1,
|
|
870
876
|
/* deletion_compaction */ false, /* l0_files_might_overlap */ true,
|
|
871
877
|
CompactionReason::kUnknown,
|
|
@@ -1045,10 +1051,12 @@ Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
|
|
|
1045
1051
|
}
|
|
1046
1052
|
|
|
1047
1053
|
Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
|
|
1048
|
-
std::unordered_set<uint64_t>* input_files,
|
|
1049
|
-
|
|
1050
|
-
const VersionStorageInfo* vstorage,
|
|
1054
|
+
std::unordered_set<uint64_t>* input_files, const int output_level,
|
|
1055
|
+
Version* version,
|
|
1051
1056
|
std::vector<CompactionInputFiles>* converted_input_files) const {
|
|
1057
|
+
ColumnFamilyMetaData cf_meta;
|
|
1058
|
+
version->GetColumnFamilyMetaData(&cf_meta);
|
|
1059
|
+
|
|
1052
1060
|
assert(static_cast<int>(cf_meta.levels.size()) - 1 ==
|
|
1053
1061
|
cf_meta.levels[cf_meta.levels.size() - 1].level);
|
|
1054
1062
|
assert(converted_input_files);
|
|
@@ -1119,7 +1127,8 @@ Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
|
|
|
1119
1127
|
}
|
|
1120
1128
|
|
|
1121
1129
|
s = GetCompactionInputsFromFileNumbers(converted_input_files, input_files,
|
|
1122
|
-
|
|
1130
|
+
version->storage_info(),
|
|
1131
|
+
CompactionOptions());
|
|
1123
1132
|
if (!s.ok()) {
|
|
1124
1133
|
return s;
|
|
1125
1134
|
}
|
|
@@ -1128,8 +1137,8 @@ Status CompactionPicker::SanitizeAndConvertCompactionInputFiles(
|
|
|
1128
1137
|
FilesRangeOverlapWithCompaction(
|
|
1129
1138
|
*converted_input_files, output_level,
|
|
1130
1139
|
Compaction::EvaluatePenultimateLevel(
|
|
1131
|
-
|
|
1132
|
-
output_level))) {
|
|
1140
|
+
version->storage_info(), version->GetMutableCFOptions(),
|
|
1141
|
+
ioptions_, (*converted_input_files)[0].level, output_level))) {
|
|
1133
1142
|
return Status::Aborted(
|
|
1134
1143
|
"A running compaction is writing to the same output level(s) in an "
|
|
1135
1144
|
"overlapping key range");
|
|
@@ -1171,7 +1180,8 @@ void CompactionPicker::UnregisterCompaction(Compaction* c) {
|
|
|
1171
1180
|
|
|
1172
1181
|
void CompactionPicker::PickFilesMarkedForCompaction(
|
|
1173
1182
|
const std::string& cf_name, VersionStorageInfo* vstorage, int* start_level,
|
|
1174
|
-
int* output_level, CompactionInputFiles* start_level_inputs
|
|
1183
|
+
int* output_level, CompactionInputFiles* start_level_inputs,
|
|
1184
|
+
std::function<bool(const FileMetaData*)> skip_marked_file) {
|
|
1175
1185
|
if (vstorage->FilesMarkedForCompaction().empty()) {
|
|
1176
1186
|
return;
|
|
1177
1187
|
}
|
|
@@ -1181,6 +1191,9 @@ void CompactionPicker::PickFilesMarkedForCompaction(
|
|
|
1181
1191
|
// If this assert() fails that means that some function marked some
|
|
1182
1192
|
// files as being_compacted, but didn't call ComputeCompactionScore()
|
|
1183
1193
|
assert(!level_file.second->being_compacted);
|
|
1194
|
+
if (skip_marked_file(level_file.second)) {
|
|
1195
|
+
return false;
|
|
1196
|
+
}
|
|
1184
1197
|
*start_level = level_file.first;
|
|
1185
1198
|
*output_level =
|
|
1186
1199
|
(*start_level == 0) ? vstorage->base_level() : *start_level + 1;
|