@nxtedition/rocksdb 13.1.4 → 13.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +43 -16
- package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
- package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
- package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
- package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
- package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
- package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
- package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
- package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
- package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
- package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
- package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
- package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
- package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
- package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
- package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
- package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
- package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
- package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
- package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
- package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
- package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
- package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
- package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
- package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
- package/deps/rocksdb/rocksdb/port/port.h +5 -9
- package/deps/rocksdb/rocksdb/src.mk +8 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/format.cc +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
- package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
- package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
- package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -67,7 +67,6 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
67
67
|
files_to_ingest_.emplace_back(std::move(file_to_ingest));
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
const Comparator* ucmp = cfd_->internal_comparator().user_comparator();
|
|
71
70
|
auto num_files = files_to_ingest_.size();
|
|
72
71
|
if (num_files == 0) {
|
|
73
72
|
return Status::InvalidArgument("The list of files is empty");
|
|
@@ -78,16 +77,12 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
78
77
|
sorted_files.push_back(&files_to_ingest_[i]);
|
|
79
78
|
}
|
|
80
79
|
|
|
81
|
-
std::sort(
|
|
82
|
-
sorted_files.begin(), sorted_files.end(),
|
|
83
|
-
[&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) {
|
|
84
|
-
return sstableKeyCompare(ucmp, info1->smallest_internal_key,
|
|
85
|
-
info2->smallest_internal_key) < 0;
|
|
86
|
-
});
|
|
80
|
+
std::sort(sorted_files.begin(), sorted_files.end(), file_range_checker_);
|
|
87
81
|
|
|
88
82
|
for (size_t i = 0; i + 1 < num_files; i++) {
|
|
89
|
-
if (
|
|
90
|
-
|
|
83
|
+
if (file_range_checker_.OverlapsWithPrev(sorted_files[i],
|
|
84
|
+
sorted_files[i + 1],
|
|
85
|
+
/* ranges_sorted= */ true)) {
|
|
91
86
|
files_overlap_ = true;
|
|
92
87
|
break;
|
|
93
88
|
}
|
|
@@ -100,7 +95,15 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
100
95
|
"behind mode.");
|
|
101
96
|
}
|
|
102
97
|
|
|
103
|
-
|
|
98
|
+
// Overlapping files need at least two different sequence numbers. If settings
|
|
99
|
+
// disables global seqno, ingestion will fail anyway, so fail fast in prepare.
|
|
100
|
+
if (!ingestion_options_.allow_global_seqno && files_overlap_) {
|
|
101
|
+
return Status::InvalidArgument(
|
|
102
|
+
"Global seqno is required, but disabled (because external files key "
|
|
103
|
+
"range overlaps).");
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (ucmp_->timestamp_size() > 0 && files_overlap_) {
|
|
104
107
|
return Status::NotSupported(
|
|
105
108
|
"Files with overlapping ranges cannot be ingested to column "
|
|
106
109
|
"family with user-defined timestamp enabled.");
|
|
@@ -336,9 +339,35 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
336
339
|
}
|
|
337
340
|
}
|
|
338
341
|
|
|
342
|
+
if (status.ok()) {
|
|
343
|
+
DivideInputFilesIntoBatches();
|
|
344
|
+
}
|
|
345
|
+
|
|
339
346
|
return status;
|
|
340
347
|
}
|
|
341
348
|
|
|
349
|
+
void ExternalSstFileIngestionJob::DivideInputFilesIntoBatches() {
|
|
350
|
+
if (!files_overlap_) {
|
|
351
|
+
// No overlap, treat as one batch without the need of tracking overall batch
|
|
352
|
+
// range.
|
|
353
|
+
file_batches_to_ingest_.emplace_back(/* _track_batch_range= */ false);
|
|
354
|
+
for (auto& file : files_to_ingest_) {
|
|
355
|
+
file_batches_to_ingest_.back().AddFile(&file, file_range_checker_);
|
|
356
|
+
}
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
file_batches_to_ingest_.emplace_back(/* _track_batch_range= */ true);
|
|
361
|
+
for (auto& file : files_to_ingest_) {
|
|
362
|
+
if (file_range_checker_.OverlapsWithPrev(&file_batches_to_ingest_.back(),
|
|
363
|
+
&file,
|
|
364
|
+
/* ranges_sorted= */ false)) {
|
|
365
|
+
file_batches_to_ingest_.emplace_back(/* _track_batch_range= */ true);
|
|
366
|
+
}
|
|
367
|
+
file_batches_to_ingest_.back().AddFile(&file, file_range_checker_);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
342
371
|
Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
|
|
343
372
|
SuperVersion* super_version) {
|
|
344
373
|
size_t n = files_to_ingest_.size();
|
|
@@ -353,9 +382,7 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
|
|
|
353
382
|
if (!ingestion_options_.allow_blocking_flush) {
|
|
354
383
|
status = Status::InvalidArgument("External file requires flush");
|
|
355
384
|
}
|
|
356
|
-
|
|
357
|
-
assert(ucmp);
|
|
358
|
-
if (ucmp->timestamp_size() > 0) {
|
|
385
|
+
if (ucmp_->timestamp_size() > 0) {
|
|
359
386
|
status = Status::InvalidArgument(
|
|
360
387
|
"Column family enables user-defined timestamps, please make "
|
|
361
388
|
"sure the key range (without timestamp) of external file does not "
|
|
@@ -368,8 +395,16 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
|
|
|
368
395
|
// REQUIRES: we have become the only writer by entering both write_thread_ and
|
|
369
396
|
// nonmem_write_thread_
|
|
370
397
|
Status ExternalSstFileIngestionJob::Run() {
|
|
371
|
-
Status status;
|
|
372
398
|
SuperVersion* super_version = cfd_->GetSuperVersion();
|
|
399
|
+
// If column family is flushed after Prepare and before Run, we should have a
|
|
400
|
+
// specific state of Memtables. The mutable Memtable should be empty, and the
|
|
401
|
+
// immutable Memtable list should be empty.
|
|
402
|
+
if (flushed_before_run_ && (super_version->imm->NumNotFlushed() != 0 ||
|
|
403
|
+
!super_version->mem->IsEmpty())) {
|
|
404
|
+
return Status::TryAgain(
|
|
405
|
+
"Inconsistent memtable state detected when flushed before run.");
|
|
406
|
+
}
|
|
407
|
+
Status status;
|
|
373
408
|
#ifndef NDEBUG
|
|
374
409
|
// We should never run the job with a memtable that is overlapping
|
|
375
410
|
// with the files we are ingesting
|
|
@@ -397,14 +432,39 @@ Status ExternalSstFileIngestionJob::Run() {
|
|
|
397
432
|
edit_.SetColumnFamily(cfd_->GetID());
|
|
398
433
|
// The levels that the files will be ingested into
|
|
399
434
|
|
|
400
|
-
|
|
435
|
+
std::optional<int> prev_batch_uppermost_level;
|
|
436
|
+
for (auto& batch : file_batches_to_ingest_) {
|
|
437
|
+
int batch_uppermost_level = 0;
|
|
438
|
+
status = AssignLevelsForOneBatch(batch, super_version, force_global_seqno,
|
|
439
|
+
&last_seqno, &batch_uppermost_level,
|
|
440
|
+
prev_batch_uppermost_level);
|
|
441
|
+
if (!status.ok()) {
|
|
442
|
+
return status;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
prev_batch_uppermost_level = batch_uppermost_level;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
CreateEquivalentFileIngestingCompactions();
|
|
449
|
+
return status;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
Status ExternalSstFileIngestionJob::AssignLevelsForOneBatch(
|
|
453
|
+
FileBatchInfo& batch, SuperVersion* super_version, bool force_global_seqno,
|
|
454
|
+
SequenceNumber* last_seqno, int* batch_uppermost_level,
|
|
455
|
+
std::optional<int> prev_batch_uppermost_level) {
|
|
456
|
+
Status status;
|
|
457
|
+
assert(batch_uppermost_level);
|
|
458
|
+
*batch_uppermost_level = std::numeric_limits<int>::max();
|
|
459
|
+
for (IngestedFileInfo* file : batch.files) {
|
|
460
|
+
assert(file);
|
|
401
461
|
SequenceNumber assigned_seqno = 0;
|
|
402
462
|
if (ingestion_options_.ingest_behind) {
|
|
403
|
-
status = CheckLevelForIngestedBehindFile(
|
|
463
|
+
status = CheckLevelForIngestedBehindFile(file);
|
|
404
464
|
} else {
|
|
405
465
|
status = AssignLevelAndSeqnoForIngestedFile(
|
|
406
466
|
super_version, force_global_seqno, cfd_->ioptions()->compaction_style,
|
|
407
|
-
last_seqno,
|
|
467
|
+
*last_seqno, file, &assigned_seqno, prev_batch_uppermost_level);
|
|
408
468
|
}
|
|
409
469
|
|
|
410
470
|
// Modify the smallest/largest internal key to include the sequence number
|
|
@@ -413,38 +473,38 @@ Status ExternalSstFileIngestionJob::Run() {
|
|
|
413
473
|
// exclusive endpoint.
|
|
414
474
|
ParsedInternalKey smallest_parsed, largest_parsed;
|
|
415
475
|
if (status.ok()) {
|
|
416
|
-
status = ParseInternalKey(*
|
|
476
|
+
status = ParseInternalKey(*(file->smallest_internal_key.rep()),
|
|
417
477
|
&smallest_parsed, false /* log_err_key */);
|
|
418
478
|
}
|
|
419
479
|
if (status.ok()) {
|
|
420
|
-
status = ParseInternalKey(*
|
|
421
|
-
false /* log_err_key */);
|
|
480
|
+
status = ParseInternalKey(*(file->largest_internal_key.rep()),
|
|
481
|
+
&largest_parsed, false /* log_err_key */);
|
|
422
482
|
}
|
|
423
483
|
if (!status.ok()) {
|
|
424
484
|
return status;
|
|
425
485
|
}
|
|
426
486
|
if (smallest_parsed.sequence == 0 && assigned_seqno != 0) {
|
|
427
|
-
UpdateInternalKey(
|
|
487
|
+
UpdateInternalKey(file->smallest_internal_key.rep(), assigned_seqno,
|
|
428
488
|
smallest_parsed.type);
|
|
429
489
|
}
|
|
430
490
|
if (largest_parsed.sequence == 0 && assigned_seqno != 0) {
|
|
431
|
-
UpdateInternalKey(
|
|
491
|
+
UpdateInternalKey(file->largest_internal_key.rep(), assigned_seqno,
|
|
432
492
|
largest_parsed.type);
|
|
433
493
|
}
|
|
434
494
|
|
|
435
|
-
status = AssignGlobalSeqnoForIngestedFile(
|
|
495
|
+
status = AssignGlobalSeqnoForIngestedFile(file, assigned_seqno);
|
|
436
496
|
if (!status.ok()) {
|
|
437
497
|
return status;
|
|
438
498
|
}
|
|
439
499
|
TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Run",
|
|
440
500
|
&assigned_seqno);
|
|
441
|
-
assert(assigned_seqno == 0 || assigned_seqno == last_seqno + 1);
|
|
442
|
-
if (assigned_seqno > last_seqno) {
|
|
443
|
-
last_seqno = assigned_seqno;
|
|
501
|
+
assert(assigned_seqno == 0 || assigned_seqno == *last_seqno + 1);
|
|
502
|
+
if (assigned_seqno > *last_seqno) {
|
|
503
|
+
*last_seqno = assigned_seqno;
|
|
444
504
|
++consumed_seqno_count_;
|
|
445
505
|
}
|
|
446
506
|
|
|
447
|
-
status = GenerateChecksumForIngestedFile(
|
|
507
|
+
status = GenerateChecksumForIngestedFile(file);
|
|
448
508
|
if (!status.ok()) {
|
|
449
509
|
return status;
|
|
450
510
|
}
|
|
@@ -459,31 +519,40 @@ Status ExternalSstFileIngestionJob::Run() {
|
|
|
459
519
|
static_cast<uint64_t>(temp_current_time);
|
|
460
520
|
}
|
|
461
521
|
uint64_t tail_size = 0;
|
|
462
|
-
bool contain_no_data_blocks =
|
|
463
|
-
(
|
|
464
|
-
|
|
465
|
-
if (
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
522
|
+
bool contain_no_data_blocks = file->table_properties.num_entries > 0 &&
|
|
523
|
+
(file->table_properties.num_entries ==
|
|
524
|
+
file->table_properties.num_range_deletions);
|
|
525
|
+
if (file->table_properties.tail_start_offset > 0 ||
|
|
526
|
+
contain_no_data_blocks) {
|
|
527
|
+
uint64_t file_size = file->fd.GetFileSize();
|
|
528
|
+
assert(file->table_properties.tail_start_offset <= file_size);
|
|
529
|
+
tail_size = file_size - file->table_properties.tail_start_offset;
|
|
469
530
|
}
|
|
470
531
|
|
|
532
|
+
bool marked_for_compaction =
|
|
533
|
+
file->table_properties.num_range_deletions == 1 &&
|
|
534
|
+
(file->table_properties.num_entries ==
|
|
535
|
+
file->table_properties.num_range_deletions);
|
|
471
536
|
FileMetaData f_metadata(
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
537
|
+
file->fd.GetNumber(), file->fd.GetPathId(), file->fd.GetFileSize(),
|
|
538
|
+
file->smallest_internal_key, file->largest_internal_key,
|
|
539
|
+
file->assigned_seqno, file->assigned_seqno, false,
|
|
540
|
+
file->file_temperature, kInvalidBlobFileNumber, oldest_ancester_time,
|
|
541
|
+
current_time,
|
|
476
542
|
ingestion_options_.ingest_behind
|
|
477
543
|
? kReservedEpochNumberForFileIngestedBehind
|
|
478
544
|
: cfd_->NewEpochNumber(),
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
f_metadata.temperature =
|
|
482
|
-
|
|
545
|
+
file->file_checksum, file->file_checksum_func_name, file->unique_id, 0,
|
|
546
|
+
tail_size, file->user_defined_timestamps_persisted);
|
|
547
|
+
f_metadata.temperature = file->file_temperature;
|
|
548
|
+
f_metadata.marked_for_compaction = marked_for_compaction;
|
|
549
|
+
edit_.AddFile(file->picked_level, f_metadata);
|
|
550
|
+
|
|
551
|
+
*batch_uppermost_level =
|
|
552
|
+
std::min(*batch_uppermost_level, file->picked_level);
|
|
483
553
|
}
|
|
484
554
|
|
|
485
|
-
|
|
486
|
-
return status;
|
|
555
|
+
return Status::OK();
|
|
487
556
|
}
|
|
488
557
|
|
|
489
558
|
void ExternalSstFileIngestionJob::CreateEquivalentFileIngestingCompactions() {
|
|
@@ -519,20 +588,17 @@ void ExternalSstFileIngestionJob::CreateEquivalentFileIngestingCompactions() {
|
|
|
519
588
|
file_ingesting_compactions_.push_back(new Compaction(
|
|
520
589
|
cfd_->current()->storage_info(), *cfd_->ioptions(), mutable_cf_options,
|
|
521
590
|
mutable_db_options_, {input}, output_level,
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
limit,
|
|
526
|
-
* not applicable
|
|
527
|
-
*/
|
|
528
|
-
,
|
|
591
|
+
/* output file size limit not applicable */
|
|
592
|
+
MaxFileSizeForLevel(mutable_cf_options, output_level,
|
|
593
|
+
cfd_->ioptions()->compaction_style),
|
|
529
594
|
LLONG_MAX /* max compaction bytes, not applicable */,
|
|
530
595
|
0 /* output path ID, not applicable */, mutable_cf_options.compression,
|
|
531
596
|
mutable_cf_options.compression_opts,
|
|
532
597
|
mutable_cf_options.default_write_temperature,
|
|
533
598
|
0 /* max_subcompaction, not applicable */,
|
|
534
|
-
{} /* grandparents, not applicable */,
|
|
535
|
-
|
|
599
|
+
{} /* grandparents, not applicable */,
|
|
600
|
+
std::nullopt /* earliest_snapshot */, nullptr /* snapshot_checker */,
|
|
601
|
+
false /* is manual */, "" /* trim_ts */, -1 /* score, not applicable */,
|
|
536
602
|
false /* is deletion compaction, not applicable */,
|
|
537
603
|
files_overlap_ /* l0_files_might_overlap, not applicable */,
|
|
538
604
|
CompactionReason::kExternalSstIngestion));
|
|
@@ -679,7 +745,10 @@ Status ExternalSstFileIngestionJob::ResetTableReader(
|
|
|
679
745
|
new RandomAccessFileReader(std::move(sst_file), external_file,
|
|
680
746
|
nullptr /*Env*/, io_tracer_));
|
|
681
747
|
table_reader->reset();
|
|
682
|
-
|
|
748
|
+
ReadOptions ro;
|
|
749
|
+
ro.fill_cache = ingestion_options_.fill_cache;
|
|
750
|
+
status = sv->mutable_cf_options.table_factory->NewTableReader(
|
|
751
|
+
ro,
|
|
683
752
|
TableReaderOptions(
|
|
684
753
|
*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
|
|
685
754
|
env_options_, cfd_->internal_comparator(),
|
|
@@ -691,7 +760,9 @@ Status ExternalSstFileIngestionJob::ResetTableReader(
|
|
|
691
760
|
/*cur_file_num*/ new_file_number,
|
|
692
761
|
/* unique_id */ {}, /* largest_seqno */ 0,
|
|
693
762
|
/* tail_size */ 0, user_defined_timestamps_persisted),
|
|
694
|
-
std::move(sst_file_reader), file_to_ingest->file_size, table_reader
|
|
763
|
+
std::move(sst_file_reader), file_to_ingest->file_size, table_reader,
|
|
764
|
+
// No need to prefetch index/filter if caching is not needed.
|
|
765
|
+
/*prefetch_index_and_filter_in_cache=*/ingestion_options_.fill_cache);
|
|
695
766
|
return status;
|
|
696
767
|
}
|
|
697
768
|
|
|
@@ -707,6 +778,7 @@ Status ExternalSstFileIngestionJob::SanityCheckTableProperties(
|
|
|
707
778
|
// Get table version
|
|
708
779
|
auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion);
|
|
709
780
|
if (version_iter == uprops.end()) {
|
|
781
|
+
assert(!SstFileWriter::CreatedBySstFileWriter(*props));
|
|
710
782
|
if (!ingestion_options_.allow_db_generated_files) {
|
|
711
783
|
return Status::Corruption("External file version not found");
|
|
712
784
|
} else {
|
|
@@ -715,6 +787,7 @@ Status ExternalSstFileIngestionJob::SanityCheckTableProperties(
|
|
|
715
787
|
file_to_ingest->version = 0;
|
|
716
788
|
}
|
|
717
789
|
} else {
|
|
790
|
+
assert(SstFileWriter::CreatedBySstFileWriter(*props));
|
|
718
791
|
file_to_ingest->version = DecodeFixed32(version_iter->second.c_str());
|
|
719
792
|
}
|
|
720
793
|
|
|
@@ -787,9 +860,7 @@ Status ExternalSstFileIngestionJob::SanityCheckTableProperties(
|
|
|
787
860
|
// `TableReader` is initialized with `user_defined_timestamps_persisted` flag
|
|
788
861
|
// to be true. If its value changed to false after this sanity check, we
|
|
789
862
|
// need to reset the `TableReader`.
|
|
790
|
-
|
|
791
|
-
assert(ucmp);
|
|
792
|
-
if (ucmp->timestamp_size() > 0 &&
|
|
863
|
+
if (ucmp_->timestamp_size() > 0 &&
|
|
793
864
|
!file_to_ingest->user_defined_timestamps_persisted) {
|
|
794
865
|
s = ResetTableReader(external_file, new_file_number,
|
|
795
866
|
file_to_ingest->user_defined_timestamps_persisted, sv,
|
|
@@ -839,6 +910,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
839
910
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
840
911
|
ReadOptions ro;
|
|
841
912
|
ro.readahead_size = ingestion_options_.verify_checksums_readahead_size;
|
|
913
|
+
ro.fill_cache = ingestion_options_.fill_cache;
|
|
842
914
|
status = table_reader->VerifyChecksum(
|
|
843
915
|
ro, TableReaderCaller::kExternalSSTIngestion);
|
|
844
916
|
if (!status.ok()) {
|
|
@@ -849,16 +921,12 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
849
921
|
ParsedInternalKey key;
|
|
850
922
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
851
923
|
ReadOptions ro;
|
|
924
|
+
ro.fill_cache = ingestion_options_.fill_cache;
|
|
852
925
|
std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
|
|
853
926
|
ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
|
|
854
927
|
/*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion));
|
|
855
928
|
|
|
856
929
|
// Get first (smallest) and last (largest) key from file.
|
|
857
|
-
file_to_ingest->smallest_internal_key =
|
|
858
|
-
InternalKey("", 0, ValueType::kTypeValue);
|
|
859
|
-
file_to_ingest->largest_internal_key =
|
|
860
|
-
InternalKey("", 0, ValueType::kTypeValue);
|
|
861
|
-
bool bounds_set = false;
|
|
862
930
|
bool allow_data_in_errors = db_options_.allow_data_in_errors;
|
|
863
931
|
iter->SeekToFirst();
|
|
864
932
|
if (iter->Valid()) {
|
|
@@ -874,7 +942,8 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
874
942
|
file_to_ingest->smallest_internal_key.SetFrom(key);
|
|
875
943
|
|
|
876
944
|
Slice largest;
|
|
877
|
-
if (strcmp(
|
|
945
|
+
if (strcmp(sv->mutable_cf_options.table_factory->Name(), "PlainTable") ==
|
|
946
|
+
0) {
|
|
878
947
|
// PlainTable iterator does not support SeekToLast().
|
|
879
948
|
largest = iter->key();
|
|
880
949
|
for (; iter->Valid(); iter->Next()) {
|
|
@@ -908,8 +977,6 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
908
977
|
return Status::Corruption("External file has non zero sequence number");
|
|
909
978
|
}
|
|
910
979
|
file_to_ingest->largest_internal_key.SetFrom(key);
|
|
911
|
-
|
|
912
|
-
bounds_set = true;
|
|
913
980
|
} else if (!iter->status().ok()) {
|
|
914
981
|
return iter->status();
|
|
915
982
|
}
|
|
@@ -946,7 +1013,6 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
946
1013
|
table_reader->NewRangeTombstoneIterator(ro));
|
|
947
1014
|
// We may need to adjust these key bounds, depending on whether any range
|
|
948
1015
|
// deletion tombstones extend past them.
|
|
949
|
-
const Comparator* ucmp = cfd_->user_comparator();
|
|
950
1016
|
if (range_del_iter != nullptr) {
|
|
951
1017
|
for (range_del_iter->SeekToFirst(); range_del_iter->Valid();
|
|
952
1018
|
range_del_iter->Next()) {
|
|
@@ -962,24 +1028,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
962
1028
|
"number.");
|
|
963
1029
|
}
|
|
964
1030
|
RangeTombstone tombstone(key, range_del_iter->value());
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
sstableKeyCompare(ucmp, start_key,
|
|
969
|
-
file_to_ingest->smallest_internal_key) < 0) {
|
|
970
|
-
file_to_ingest->smallest_internal_key = start_key;
|
|
971
|
-
}
|
|
972
|
-
InternalKey end_key = tombstone.SerializeEndKey();
|
|
973
|
-
if (!bounds_set ||
|
|
974
|
-
sstableKeyCompare(ucmp, end_key,
|
|
975
|
-
file_to_ingest->largest_internal_key) > 0) {
|
|
976
|
-
file_to_ingest->largest_internal_key = end_key;
|
|
977
|
-
}
|
|
978
|
-
bounds_set = true;
|
|
1031
|
+
file_range_checker_.MaybeUpdateRange(tombstone.SerializeKey(),
|
|
1032
|
+
tombstone.SerializeEndKey(),
|
|
1033
|
+
file_to_ingest);
|
|
979
1034
|
}
|
|
980
1035
|
}
|
|
981
1036
|
|
|
982
|
-
const size_t ts_sz =
|
|
1037
|
+
const size_t ts_sz = ucmp_->timestamp_size();
|
|
983
1038
|
Slice smallest = file_to_ingest->smallest_internal_key.user_key();
|
|
984
1039
|
Slice largest = file_to_ingest->largest_internal_key.user_key();
|
|
985
1040
|
if (ts_sz > 0) {
|
|
@@ -1008,16 +1063,19 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
1008
1063
|
Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
1009
1064
|
SuperVersion* sv, bool force_global_seqno, CompactionStyle compaction_style,
|
|
1010
1065
|
SequenceNumber last_seqno, IngestedFileInfo* file_to_ingest,
|
|
1011
|
-
SequenceNumber* assigned_seqno
|
|
1066
|
+
SequenceNumber* assigned_seqno,
|
|
1067
|
+
std::optional<int> prev_batch_uppermost_level) {
|
|
1012
1068
|
Status status;
|
|
1013
1069
|
*assigned_seqno = 0;
|
|
1014
|
-
|
|
1015
|
-
|
|
1070
|
+
const size_t ts_sz = ucmp_->timestamp_size();
|
|
1071
|
+
assert(!prev_batch_uppermost_level.has_value() ||
|
|
1072
|
+
prev_batch_uppermost_level.value() < cfd_->NumberLevels());
|
|
1073
|
+
bool must_assign_to_l0 = prev_batch_uppermost_level.has_value() &&
|
|
1074
|
+
prev_batch_uppermost_level.value() == 0;
|
|
1016
1075
|
if (force_global_seqno || files_overlap_ ||
|
|
1017
|
-
compaction_style == kCompactionStyleFIFO) {
|
|
1076
|
+
compaction_style == kCompactionStyleFIFO || must_assign_to_l0) {
|
|
1018
1077
|
*assigned_seqno = last_seqno + 1;
|
|
1019
|
-
|
|
1020
|
-
if (files_overlap_ || compaction_style == kCompactionStyleFIFO) {
|
|
1078
|
+
if (compaction_style == kCompactionStyleFIFO || must_assign_to_l0) {
|
|
1021
1079
|
assert(ts_sz == 0);
|
|
1022
1080
|
file_to_ingest->picked_level = 0;
|
|
1023
1081
|
if (ingestion_options_.fail_if_not_bottommost_level &&
|
|
@@ -1034,11 +1092,16 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
1034
1092
|
Arena arena;
|
|
1035
1093
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
1036
1094
|
ReadOptions ro;
|
|
1095
|
+
ro.fill_cache = ingestion_options_.fill_cache;
|
|
1037
1096
|
ro.total_order_seek = true;
|
|
1038
1097
|
int target_level = 0;
|
|
1039
1098
|
auto* vstorage = cfd_->current()->storage_info();
|
|
1099
|
+
assert(!must_assign_to_l0);
|
|
1100
|
+
int exclusive_end_level = prev_batch_uppermost_level.has_value()
|
|
1101
|
+
? prev_batch_uppermost_level.value()
|
|
1102
|
+
: cfd_->NumberLevels();
|
|
1040
1103
|
|
|
1041
|
-
for (int lvl = 0; lvl <
|
|
1104
|
+
for (int lvl = 0; lvl < exclusive_end_level; lvl++) {
|
|
1042
1105
|
if (lvl > 0 && lvl < vstorage->base_level()) {
|
|
1043
1106
|
continue;
|
|
1044
1107
|
}
|
|
@@ -1065,8 +1128,6 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
1065
1128
|
overlap_with_db = true;
|
|
1066
1129
|
break;
|
|
1067
1130
|
}
|
|
1068
|
-
} else if (compaction_style == kCompactionStyleUniversal) {
|
|
1069
|
-
continue;
|
|
1070
1131
|
}
|
|
1071
1132
|
|
|
1072
1133
|
// We don't overlap with any keys in this level, but we still need to check
|
|
@@ -25,13 +25,74 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
25
25
|
class Directories;
|
|
26
26
|
class SystemClock;
|
|
27
27
|
|
|
28
|
-
struct
|
|
29
|
-
//
|
|
30
|
-
std::string external_file_path;
|
|
31
|
-
// Smallest internal key in external file
|
|
28
|
+
struct KeyRangeInfo {
|
|
29
|
+
// Smallest internal key in an external file or for a batch of external files.
|
|
32
30
|
InternalKey smallest_internal_key;
|
|
33
|
-
// Largest internal key in external file
|
|
31
|
+
// Largest internal key in an external file or for a batch of external files.
|
|
34
32
|
InternalKey largest_internal_key;
|
|
33
|
+
|
|
34
|
+
bool empty() const {
|
|
35
|
+
return smallest_internal_key.size() == 0 &&
|
|
36
|
+
largest_internal_key.size() == 0;
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// Helper class to apply SST file key range checks to the external files.
|
|
41
|
+
class ExternalFileRangeChecker {
|
|
42
|
+
public:
|
|
43
|
+
explicit ExternalFileRangeChecker(const Comparator* ucmp) : ucmp_(ucmp) {}
|
|
44
|
+
|
|
45
|
+
// Operator used for sorting ranges.
|
|
46
|
+
bool operator()(const KeyRangeInfo* prev_range,
|
|
47
|
+
const KeyRangeInfo* range) const {
|
|
48
|
+
assert(prev_range);
|
|
49
|
+
assert(range);
|
|
50
|
+
return sstableKeyCompare(ucmp_, prev_range->smallest_internal_key,
|
|
51
|
+
range->smallest_internal_key) < 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Check whether `range` overlaps with `prev_range`. `ranges_sorted` can be
|
|
55
|
+
// set to true when the inputs are already sorted based on the sorting logic
|
|
56
|
+
// provided by this checker's operator(), which can help simplify the check.
|
|
57
|
+
bool OverlapsWithPrev(const KeyRangeInfo* prev_range,
|
|
58
|
+
const KeyRangeInfo* range,
|
|
59
|
+
bool ranges_sorted = false) const {
|
|
60
|
+
assert(prev_range);
|
|
61
|
+
assert(range);
|
|
62
|
+
if (prev_range->empty() || range->empty()) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
if (ranges_sorted) {
|
|
66
|
+
return sstableKeyCompare(ucmp_, prev_range->largest_internal_key,
|
|
67
|
+
range->smallest_internal_key) >= 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return sstableKeyCompare(ucmp_, prev_range->largest_internal_key,
|
|
71
|
+
range->smallest_internal_key) >= 0 &&
|
|
72
|
+
sstableKeyCompare(ucmp_, prev_range->smallest_internal_key,
|
|
73
|
+
range->largest_internal_key) <= 0;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
void MaybeUpdateRange(const InternalKey& start_key,
|
|
77
|
+
const InternalKey& end_key, KeyRangeInfo* range) const {
|
|
78
|
+
assert(range);
|
|
79
|
+
if (range->smallest_internal_key.size() == 0 ||
|
|
80
|
+
sstableKeyCompare(ucmp_, start_key, range->smallest_internal_key) < 0) {
|
|
81
|
+
range->smallest_internal_key = start_key;
|
|
82
|
+
}
|
|
83
|
+
if (range->largest_internal_key.size() == 0 ||
|
|
84
|
+
sstableKeyCompare(ucmp_, end_key, range->largest_internal_key) > 0) {
|
|
85
|
+
range->largest_internal_key = end_key;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private:
|
|
90
|
+
const Comparator* ucmp_;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
struct IngestedFileInfo : public KeyRangeInfo {
|
|
94
|
+
// External file path
|
|
95
|
+
std::string external_file_path;
|
|
35
96
|
// NOTE: use below two fields for all `*Overlap*` types of checks instead of
|
|
36
97
|
// smallest_internal_key.user_key() and largest_internal_key.user_key().
|
|
37
98
|
// The smallest / largest user key contained in the file for key range checks.
|
|
@@ -94,6 +155,30 @@ struct IngestedFileInfo {
|
|
|
94
155
|
bool user_defined_timestamps_persisted = true;
|
|
95
156
|
};
|
|
96
157
|
|
|
158
|
+
// A batch of files.
|
|
159
|
+
struct FileBatchInfo : public KeyRangeInfo {
|
|
160
|
+
autovector<IngestedFileInfo*> files;
|
|
161
|
+
// When true, `smallest_internal_key` and `largest_internal_key` will be
|
|
162
|
+
// tracked and updated as new file get added via `AddFile`. When false, we
|
|
163
|
+
// bypass this tracking. This is used when the all input external files
|
|
164
|
+
// are already checked and not overlapping, and they just need to be added
|
|
165
|
+
// into one default batch.
|
|
166
|
+
bool track_batch_range;
|
|
167
|
+
|
|
168
|
+
void AddFile(IngestedFileInfo* file,
|
|
169
|
+
const ExternalFileRangeChecker& key_range_checker) {
|
|
170
|
+
assert(file);
|
|
171
|
+
files.push_back(file);
|
|
172
|
+
if (track_batch_range) {
|
|
173
|
+
key_range_checker.MaybeUpdateRange(file->smallest_internal_key,
|
|
174
|
+
file->largest_internal_key, this);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
explicit FileBatchInfo(bool _track_batch_range)
|
|
179
|
+
: track_batch_range(_track_batch_range) {}
|
|
180
|
+
};
|
|
181
|
+
|
|
97
182
|
class ExternalSstFileIngestionJob {
|
|
98
183
|
public:
|
|
99
184
|
ExternalSstFileIngestionJob(
|
|
@@ -108,6 +193,8 @@ class ExternalSstFileIngestionJob {
|
|
|
108
193
|
fs_(db_options.fs, io_tracer),
|
|
109
194
|
versions_(versions),
|
|
110
195
|
cfd_(cfd),
|
|
196
|
+
ucmp_(cfd ? cfd->user_comparator() : nullptr),
|
|
197
|
+
file_range_checker_(ucmp_),
|
|
111
198
|
db_options_(db_options),
|
|
112
199
|
mutable_db_options_(mutable_db_options),
|
|
113
200
|
env_options_(env_options),
|
|
@@ -119,10 +206,14 @@ class ExternalSstFileIngestionJob {
|
|
|
119
206
|
consumed_seqno_count_(0),
|
|
120
207
|
io_tracer_(io_tracer) {
|
|
121
208
|
assert(directories != nullptr);
|
|
209
|
+
assert(cfd_);
|
|
210
|
+
assert(ucmp_);
|
|
122
211
|
}
|
|
123
212
|
|
|
124
213
|
~ExternalSstFileIngestionJob() { UnregisterRange(); }
|
|
125
214
|
|
|
215
|
+
ColumnFamilyData* GetColumnFamilyData() const { return cfd_; }
|
|
216
|
+
|
|
126
217
|
// Prepare the job by copying external files into the DB.
|
|
127
218
|
Status Prepare(const std::vector<std::string>& external_files_paths,
|
|
128
219
|
const std::vector<std::string>& files_checksums,
|
|
@@ -140,6 +231,8 @@ class ExternalSstFileIngestionJob {
|
|
|
140
231
|
// Thread-safe
|
|
141
232
|
Status NeedsFlush(bool* flush_needed, SuperVersion* super_version);
|
|
142
233
|
|
|
234
|
+
void SetFlushedBeforeRun() { flushed_before_run_ = true; }
|
|
235
|
+
|
|
143
236
|
// Will execute the ingestion job and prepare edit() to be applied.
|
|
144
237
|
// REQUIRES: Mutex held
|
|
145
238
|
Status Run();
|
|
@@ -194,15 +287,38 @@ class ExternalSstFileIngestionJob {
|
|
|
194
287
|
IngestedFileInfo* file_to_ingest,
|
|
195
288
|
SuperVersion* sv);
|
|
196
289
|
|
|
290
|
+
// If the input files' key range overlaps themselves, this function divides
|
|
291
|
+
// them in the user specified order into multiple batches. Where the files
|
|
292
|
+
// within a batch do not overlap with each other, but key range could overlap
|
|
293
|
+
// between batches.
|
|
294
|
+
// If the input files' key range don't overlap themselves, they always just
|
|
295
|
+
// make one batch.
|
|
296
|
+
void DivideInputFilesIntoBatches();
|
|
297
|
+
|
|
298
|
+
// Assign level for the files in one batch. The files within one batch are not
|
|
299
|
+
// overlapping, and we assign level to each file one after another.
|
|
300
|
+
// If `prev_batch_uppermost_level` is specified, all files in this batch will
|
|
301
|
+
// be assigned to levels that are higher than `prev_batch_uppermost_level`.
|
|
302
|
+
// The uppermost level used by this batch of files is tracked too, so that it
|
|
303
|
+
// can be used by the next batch.
|
|
304
|
+
// REQUIRES: Mutex held
|
|
305
|
+
Status AssignLevelsForOneBatch(FileBatchInfo& batch,
|
|
306
|
+
SuperVersion* super_version,
|
|
307
|
+
bool force_global_seqno,
|
|
308
|
+
SequenceNumber* last_seqno,
|
|
309
|
+
int* batch_uppermost_level,
|
|
310
|
+
std::optional<int> prev_batch_uppermost_level);
|
|
311
|
+
|
|
197
312
|
// Assign `file_to_ingest` the appropriate sequence number and the lowest
|
|
198
313
|
// possible level that it can be ingested to according to compaction_style.
|
|
314
|
+
// If `prev_batch_uppermost_level` is specified, the file will only be
|
|
315
|
+
// assigned to levels tha are higher than `prev_batch_uppermost_level`.
|
|
199
316
|
// REQUIRES: Mutex held
|
|
200
|
-
Status AssignLevelAndSeqnoForIngestedFile(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
SequenceNumber* assigned_seqno);
|
|
317
|
+
Status AssignLevelAndSeqnoForIngestedFile(
|
|
318
|
+
SuperVersion* sv, bool force_global_seqno,
|
|
319
|
+
CompactionStyle compaction_style, SequenceNumber last_seqno,
|
|
320
|
+
IngestedFileInfo* file_to_ingest, SequenceNumber* assigned_seqno,
|
|
321
|
+
std::optional<int> prev_batch_uppermost_level);
|
|
206
322
|
|
|
207
323
|
// File that we want to ingest behind always goes to the lowest level;
|
|
208
324
|
// we just check that it fits in the level, that DB allows ingest_behind,
|
|
@@ -237,11 +353,14 @@ class ExternalSstFileIngestionJob {
|
|
|
237
353
|
FileSystemPtr fs_;
|
|
238
354
|
VersionSet* versions_;
|
|
239
355
|
ColumnFamilyData* cfd_;
|
|
356
|
+
const Comparator* ucmp_;
|
|
357
|
+
ExternalFileRangeChecker file_range_checker_;
|
|
240
358
|
const ImmutableDBOptions& db_options_;
|
|
241
359
|
const MutableDBOptions& mutable_db_options_;
|
|
242
360
|
const EnvOptions& env_options_;
|
|
243
361
|
SnapshotList* db_snapshots_;
|
|
244
362
|
autovector<IngestedFileInfo> files_to_ingest_;
|
|
363
|
+
std::vector<FileBatchInfo> file_batches_to_ingest_;
|
|
245
364
|
const IngestExternalFileOptions& ingestion_options_;
|
|
246
365
|
Directories* directories_;
|
|
247
366
|
EventLogger* event_logger_;
|
|
@@ -256,6 +375,10 @@ class ExternalSstFileIngestionJob {
|
|
|
256
375
|
bool need_generate_file_checksum_{true};
|
|
257
376
|
std::shared_ptr<IOTracer> io_tracer_;
|
|
258
377
|
|
|
378
|
+
// Flag indicating whether the column family is flushed after `Prepare` and
|
|
379
|
+
// before `Run`.
|
|
380
|
+
bool flushed_before_run_{false};
|
|
381
|
+
|
|
259
382
|
// Below are variables used in (un)registering range for this ingestion job
|
|
260
383
|
//
|
|
261
384
|
// FileMetaData used in inputs of compactions equivalent to this ingestion
|