@nxtedition/rocksdb 8.1.4 → 8.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
- package/deps/rocksdb/rocksdb/Makefile +15 -3
- package/deps/rocksdb/rocksdb/TARGETS +6 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
- package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
- package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
- package/deps/rocksdb/rocksdb/db/builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/c.cc +15 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
- package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
- package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
- package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
- package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
- package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
- package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
- package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
- package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
- package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
- package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
- package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
- package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
- package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
- package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
- package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
- package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
- package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
- package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
- package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
- package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/src.mk +3 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/format.cc +24 -20
- package/deps/rocksdb/rocksdb/table/format.h +6 -3
- package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
- package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.h +11 -2
- package/deps/rocksdb/rocksdb/util/status.cc +7 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
- package/package.json +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
|
@@ -569,12 +569,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(
|
|
|
569
569
|
assert(result);
|
|
570
570
|
|
|
571
571
|
if (compression_type == kNoCompression) {
|
|
572
|
-
|
|
573
|
-
AllocateBlock(value_slice.size(), allocator);
|
|
574
|
-
memcpy(allocation.get(), value_slice.data(), value_slice.size());
|
|
575
|
-
|
|
576
|
-
*result = BlobContents::Create(std::move(allocation), value_slice.size());
|
|
577
|
-
|
|
572
|
+
BlobContentsCreator::Create(result, nullptr, value_slice, allocator);
|
|
578
573
|
return Status::OK();
|
|
579
574
|
}
|
|
580
575
|
|
|
@@ -602,7 +597,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(
|
|
|
602
597
|
return Status::Corruption("Unable to uncompress blob");
|
|
603
598
|
}
|
|
604
599
|
|
|
605
|
-
|
|
600
|
+
result->reset(new BlobContents(std::move(output), uncompressed_size));
|
|
606
601
|
|
|
607
602
|
return Status::OK();
|
|
608
603
|
}
|
|
@@ -36,8 +36,8 @@ BlobSource::BlobSource(const ImmutableOptions* immutable_options,
|
|
|
36
36
|
if (bbto &&
|
|
37
37
|
bbto->cache_usage_options.options_overrides.at(CacheEntryRole::kBlobCache)
|
|
38
38
|
.charged == CacheEntryRoleOptions::Decision::kEnabled) {
|
|
39
|
-
blob_cache_ = std::make_shared<ChargedCache>(
|
|
40
|
-
|
|
39
|
+
blob_cache_ = SharedCacheInterface{std::make_shared<ChargedCache>(
|
|
40
|
+
immutable_options->blob_cache, bbto->block_cache)};
|
|
41
41
|
}
|
|
42
42
|
#endif // ROCKSDB_LITE
|
|
43
43
|
}
|
|
@@ -82,9 +82,8 @@ Status BlobSource::PutBlobIntoCache(
|
|
|
82
82
|
assert(cached_blob);
|
|
83
83
|
assert(cached_blob->IsEmpty());
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
TypedHandle* cache_handle = nullptr;
|
|
86
86
|
const Status s = InsertEntryIntoCache(cache_key, blob->get(),
|
|
87
|
-
(*blob)->ApproximateMemoryUsage(),
|
|
88
87
|
&cache_handle, Cache::Priority::BOTTOM);
|
|
89
88
|
if (s.ok()) {
|
|
90
89
|
blob->release();
|
|
@@ -106,26 +105,10 @@ Status BlobSource::PutBlobIntoCache(
|
|
|
106
105
|
return s;
|
|
107
106
|
}
|
|
108
107
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
Cache::CreateCallback create_cb =
|
|
114
|
-
[allocator = blob_cache_->memory_allocator()](
|
|
115
|
-
const void* buf, size_t size, void** out_obj,
|
|
116
|
-
size_t* charge) -> Status {
|
|
117
|
-
return BlobContents::CreateCallback(AllocateBlock(size, allocator), buf,
|
|
118
|
-
size, out_obj, charge);
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
cache_handle = blob_cache_->Lookup(key, BlobContents::GetCacheItemHelper(),
|
|
122
|
-
create_cb, Cache::Priority::BOTTOM,
|
|
123
|
-
true /* wait_for_cache */, statistics_);
|
|
124
|
-
} else {
|
|
125
|
-
cache_handle = blob_cache_->Lookup(key, statistics_);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return cache_handle;
|
|
108
|
+
BlobSource::TypedHandle* BlobSource::GetEntryFromCache(const Slice& key) const {
|
|
109
|
+
return blob_cache_.LookupFull(
|
|
110
|
+
key, nullptr /* context */, Cache::Priority::BOTTOM,
|
|
111
|
+
true /* wait_for_cache */, statistics_, lowest_used_cache_tier_);
|
|
129
112
|
}
|
|
130
113
|
|
|
131
114
|
void BlobSource::PinCachedBlob(CacheHandleGuard<BlobContents>* cached_blob,
|
|
@@ -166,24 +149,11 @@ void BlobSource::PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
|
|
|
166
149
|
}
|
|
167
150
|
|
|
168
151
|
Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value,
|
|
169
|
-
|
|
170
|
-
Cache::Handle** cache_handle,
|
|
152
|
+
TypedHandle** cache_handle,
|
|
171
153
|
Cache::Priority priority) const {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
BlobContents::GetCacheItemHelper();
|
|
176
|
-
assert(cache_item_helper);
|
|
177
|
-
|
|
178
|
-
if (lowest_used_cache_tier_ == CacheTier::kNonVolatileBlockTier) {
|
|
179
|
-
s = blob_cache_->Insert(key, value, cache_item_helper, charge, cache_handle,
|
|
180
|
-
priority);
|
|
181
|
-
} else {
|
|
182
|
-
s = blob_cache_->Insert(key, value, charge, cache_item_helper->del_cb,
|
|
183
|
-
cache_handle, priority);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return s;
|
|
154
|
+
return blob_cache_.InsertFull(key, value, value->ApproximateMemoryUsage(),
|
|
155
|
+
cache_handle, priority,
|
|
156
|
+
lowest_used_cache_tier_);
|
|
187
157
|
}
|
|
188
158
|
|
|
189
159
|
Status BlobSource::GetBlob(const ReadOptions& read_options,
|
|
@@ -252,9 +222,10 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
|
|
|
252
222
|
return Status::Corruption("Compression type mismatch when reading blob");
|
|
253
223
|
}
|
|
254
224
|
|
|
255
|
-
MemoryAllocator* const allocator =
|
|
256
|
-
|
|
257
|
-
|
|
225
|
+
MemoryAllocator* const allocator =
|
|
226
|
+
(blob_cache_ && read_options.fill_cache)
|
|
227
|
+
? blob_cache_.get()->memory_allocator()
|
|
228
|
+
: nullptr;
|
|
258
229
|
|
|
259
230
|
uint64_t read_size = 0;
|
|
260
231
|
s = blob_file_reader.GetValue()->GetBlob(
|
|
@@ -418,9 +389,10 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
|
|
|
418
389
|
|
|
419
390
|
assert(blob_file_reader.GetValue());
|
|
420
391
|
|
|
421
|
-
MemoryAllocator* const allocator =
|
|
422
|
-
|
|
423
|
-
|
|
392
|
+
MemoryAllocator* const allocator =
|
|
393
|
+
(blob_cache_ && read_options.fill_cache)
|
|
394
|
+
? blob_cache_.get()->memory_allocator()
|
|
395
|
+
: nullptr;
|
|
424
396
|
|
|
425
397
|
blob_file_reader.GetValue()->MultiGetBlob(read_options, allocator,
|
|
426
398
|
_blob_reqs, &_bytes_read);
|
|
@@ -8,8 +8,9 @@
|
|
|
8
8
|
#include <cinttypes>
|
|
9
9
|
#include <memory>
|
|
10
10
|
|
|
11
|
-
#include "cache/cache_helpers.h"
|
|
12
11
|
#include "cache/cache_key.h"
|
|
12
|
+
#include "cache/typed_cache.h"
|
|
13
|
+
#include "db/blob/blob_contents.h"
|
|
13
14
|
#include "db/blob/blob_file_cache.h"
|
|
14
15
|
#include "db/blob/blob_read_request.h"
|
|
15
16
|
#include "rocksdb/cache.h"
|
|
@@ -23,7 +24,6 @@ struct ImmutableOptions;
|
|
|
23
24
|
class Status;
|
|
24
25
|
class FilePrefetchBuffer;
|
|
25
26
|
class Slice;
|
|
26
|
-
class BlobContents;
|
|
27
27
|
|
|
28
28
|
// BlobSource is a class that provides universal access to blobs, regardless of
|
|
29
29
|
// whether they are in the blob cache, secondary cache, or (remote) storage.
|
|
@@ -106,6 +106,14 @@ class BlobSource {
|
|
|
106
106
|
bool TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
|
|
107
107
|
uint64_t offset, size_t* charge = nullptr) const;
|
|
108
108
|
|
|
109
|
+
// For TypedSharedCacheInterface
|
|
110
|
+
void Create(BlobContents** out, const char* buf, size_t size,
|
|
111
|
+
MemoryAllocator* alloc);
|
|
112
|
+
|
|
113
|
+
using SharedCacheInterface =
|
|
114
|
+
FullTypedSharedCacheInterface<BlobContents, BlobContentsCreator>;
|
|
115
|
+
using TypedHandle = SharedCacheInterface::TypedHandle;
|
|
116
|
+
|
|
109
117
|
private:
|
|
110
118
|
Status GetBlobFromCache(const Slice& cache_key,
|
|
111
119
|
CacheHandleGuard<BlobContents>* cached_blob) const;
|
|
@@ -120,10 +128,10 @@ class BlobSource {
|
|
|
120
128
|
static void PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
|
|
121
129
|
PinnableSlice* value);
|
|
122
130
|
|
|
123
|
-
|
|
131
|
+
TypedHandle* GetEntryFromCache(const Slice& key) const;
|
|
124
132
|
|
|
125
133
|
Status InsertEntryIntoCache(const Slice& key, BlobContents* value,
|
|
126
|
-
|
|
134
|
+
TypedHandle** cache_handle,
|
|
127
135
|
Cache::Priority priority) const;
|
|
128
136
|
|
|
129
137
|
inline CacheKey GetCacheKey(uint64_t file_number, uint64_t /*file_size*/,
|
|
@@ -141,7 +149,7 @@ class BlobSource {
|
|
|
141
149
|
BlobFileCache* blob_file_cache_;
|
|
142
150
|
|
|
143
151
|
// A cache to store uncompressed blobs.
|
|
144
|
-
|
|
152
|
+
mutable SharedCacheInterface blob_cache_;
|
|
145
153
|
|
|
146
154
|
// The control option of how the cache tiers will be used. Currently rocksdb
|
|
147
155
|
// support block/blob cache (volatile tier) and secondary cache (this tier
|
|
@@ -1150,15 +1150,6 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1150
1150
|
auto blob_cache = options_.blob_cache;
|
|
1151
1151
|
auto secondary_cache = lru_cache_opts_.secondary_cache;
|
|
1152
1152
|
|
|
1153
|
-
Cache::CreateCallback create_cb = [](const void* buf, size_t size,
|
|
1154
|
-
void** out_obj,
|
|
1155
|
-
size_t* charge) -> Status {
|
|
1156
|
-
CacheAllocationPtr allocation(new char[size]);
|
|
1157
|
-
|
|
1158
|
-
return BlobContents::CreateCallback(std::move(allocation), buf, size,
|
|
1159
|
-
out_obj, charge);
|
|
1160
|
-
};
|
|
1161
|
-
|
|
1162
1153
|
{
|
|
1163
1154
|
// GetBlob
|
|
1164
1155
|
std::vector<PinnableSlice> values(keys.size());
|
|
@@ -1219,14 +1210,15 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1219
1210
|
{
|
|
1220
1211
|
CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[0]);
|
|
1221
1212
|
const Slice key0 = cache_key.AsSlice();
|
|
1222
|
-
auto handle0 = blob_cache->
|
|
1213
|
+
auto handle0 = blob_cache->BasicLookup(key0, statistics);
|
|
1223
1214
|
ASSERT_EQ(handle0, nullptr);
|
|
1224
1215
|
|
|
1225
1216
|
// key0's item should be in the secondary cache.
|
|
1226
1217
|
bool is_in_sec_cache = false;
|
|
1227
|
-
auto sec_handle0 =
|
|
1228
|
-
|
|
1229
|
-
|
|
1218
|
+
auto sec_handle0 = secondary_cache->Lookup(
|
|
1219
|
+
key0, &BlobSource::SharedCacheInterface::kFullHelper,
|
|
1220
|
+
/*context*/ nullptr, true,
|
|
1221
|
+
/*advise_erase=*/true, is_in_sec_cache);
|
|
1230
1222
|
ASSERT_FALSE(is_in_sec_cache);
|
|
1231
1223
|
ASSERT_NE(sec_handle0, nullptr);
|
|
1232
1224
|
ASSERT_TRUE(sec_handle0->IsReady());
|
|
@@ -1246,14 +1238,15 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1246
1238
|
{
|
|
1247
1239
|
CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[1]);
|
|
1248
1240
|
const Slice key1 = cache_key.AsSlice();
|
|
1249
|
-
auto handle1 = blob_cache->
|
|
1241
|
+
auto handle1 = blob_cache->BasicLookup(key1, statistics);
|
|
1250
1242
|
ASSERT_NE(handle1, nullptr);
|
|
1251
1243
|
blob_cache->Release(handle1);
|
|
1252
1244
|
|
|
1253
1245
|
bool is_in_sec_cache = false;
|
|
1254
|
-
auto sec_handle1 =
|
|
1255
|
-
|
|
1256
|
-
|
|
1246
|
+
auto sec_handle1 = secondary_cache->Lookup(
|
|
1247
|
+
key1, &BlobSource::SharedCacheInterface::kFullHelper,
|
|
1248
|
+
/*context*/ nullptr, true,
|
|
1249
|
+
/*advise_erase=*/true, is_in_sec_cache);
|
|
1257
1250
|
ASSERT_FALSE(is_in_sec_cache);
|
|
1258
1251
|
ASSERT_EQ(sec_handle1, nullptr);
|
|
1259
1252
|
|
|
@@ -1276,7 +1269,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1276
1269
|
// key0 should be in the primary cache.
|
|
1277
1270
|
CacheKey cache_key0 = base_cache_key.WithOffset(blob_offsets[0]);
|
|
1278
1271
|
const Slice key0 = cache_key0.AsSlice();
|
|
1279
|
-
auto handle0 = blob_cache->
|
|
1272
|
+
auto handle0 = blob_cache->BasicLookup(key0, statistics);
|
|
1280
1273
|
ASSERT_NE(handle0, nullptr);
|
|
1281
1274
|
auto value = static_cast<BlobContents*>(blob_cache->Value(handle0));
|
|
1282
1275
|
ASSERT_NE(value, nullptr);
|
|
@@ -1286,12 +1279,12 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1286
1279
|
// key1 is not in the primary cache and is in the secondary cache.
|
|
1287
1280
|
CacheKey cache_key1 = base_cache_key.WithOffset(blob_offsets[1]);
|
|
1288
1281
|
const Slice key1 = cache_key1.AsSlice();
|
|
1289
|
-
auto handle1 = blob_cache->
|
|
1282
|
+
auto handle1 = blob_cache->BasicLookup(key1, statistics);
|
|
1290
1283
|
ASSERT_EQ(handle1, nullptr);
|
|
1291
1284
|
|
|
1292
1285
|
// erase key0 from the primary cache.
|
|
1293
1286
|
blob_cache->Erase(key0);
|
|
1294
|
-
handle0 = blob_cache->
|
|
1287
|
+
handle0 = blob_cache->BasicLookup(key0, statistics);
|
|
1295
1288
|
ASSERT_EQ(handle0, nullptr);
|
|
1296
1289
|
|
|
1297
1290
|
// key1 promotion should succeed due to the primary cache being empty. we
|
|
@@ -1307,7 +1300,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1307
1300
|
// in the secondary cache. So, the primary cache's Lookup() without
|
|
1308
1301
|
// secondary cache support cannot see it. (NOTE: The dummy handle used
|
|
1309
1302
|
// to be a leaky abstraction but not anymore.)
|
|
1310
|
-
handle1 = blob_cache->
|
|
1303
|
+
handle1 = blob_cache->BasicLookup(key1, statistics);
|
|
1311
1304
|
ASSERT_EQ(handle1, nullptr);
|
|
1312
1305
|
|
|
1313
1306
|
// But after another access, it is promoted to primary cache
|
|
@@ -1315,7 +1308,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
|
|
|
1315
1308
|
blob_offsets[1]));
|
|
1316
1309
|
|
|
1317
1310
|
// And Lookup() can find it (without secondary cache support)
|
|
1318
|
-
handle1 = blob_cache->
|
|
1311
|
+
handle1 = blob_cache->BasicLookup(key1, statistics);
|
|
1319
1312
|
ASSERT_NE(handle1, nullptr);
|
|
1320
1313
|
ASSERT_NE(blob_cache->Value(handle1), nullptr);
|
|
1321
1314
|
blob_cache->Release(handle1);
|
|
@@ -71,8 +71,9 @@ Status BuildTable(
|
|
|
71
71
|
int job_id, const Env::IOPriority io_priority,
|
|
72
72
|
TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
|
|
73
73
|
const std::string* full_history_ts_low,
|
|
74
|
-
BlobFileCompletionCallback* blob_callback,
|
|
75
|
-
uint64_t*
|
|
74
|
+
BlobFileCompletionCallback* blob_callback, Version* version,
|
|
75
|
+
uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
|
|
76
|
+
uint64_t* memtable_garbage_bytes) {
|
|
76
77
|
assert((tboptions.column_family_id ==
|
|
77
78
|
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
|
|
78
79
|
tboptions.column_family_name.empty());
|
|
@@ -175,10 +176,10 @@ Status BuildTable(
|
|
|
175
176
|
builder = NewTableBuilder(tboptions, file_writer.get());
|
|
176
177
|
}
|
|
177
178
|
|
|
179
|
+
auto ucmp = tboptions.internal_comparator.user_comparator();
|
|
178
180
|
MergeHelper merge(
|
|
179
|
-
env,
|
|
180
|
-
ioptions.
|
|
181
|
-
true /* internal key corruption is not ok */,
|
|
181
|
+
env, ucmp, ioptions.merge_operator.get(), compaction_filter.get(),
|
|
182
|
+
ioptions.logger, true /* internal key corruption is not ok */,
|
|
182
183
|
snapshots.empty() ? 0 : snapshots.back(), snapshot_checker);
|
|
183
184
|
|
|
184
185
|
std::unique_ptr<BlobFileBuilder> blob_file_builder(
|
|
@@ -196,9 +197,8 @@ Status BuildTable(
|
|
|
196
197
|
|
|
197
198
|
const std::atomic<bool> kManualCompactionCanceledFalse{false};
|
|
198
199
|
CompactionIterator c_iter(
|
|
199
|
-
iter,
|
|
200
|
-
|
|
201
|
-
job_snapshot, snapshot_checker, env,
|
|
200
|
+
iter, ucmp, &merge, kMaxSequenceNumber, &snapshots,
|
|
201
|
+
earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
|
|
202
202
|
ShouldReportDetailedTime(env, ioptions.stats),
|
|
203
203
|
true /* internal key corruption is not ok */, range_del_agg.get(),
|
|
204
204
|
blob_file_builder.get(), ioptions.allow_data_in_errors,
|
|
@@ -241,14 +241,28 @@ Status BuildTable(
|
|
|
241
241
|
|
|
242
242
|
if (s.ok()) {
|
|
243
243
|
auto range_del_it = range_del_agg->NewIterator();
|
|
244
|
+
Slice last_tombstone_start_user_key{};
|
|
244
245
|
for (range_del_it->SeekToFirst(); range_del_it->Valid();
|
|
245
246
|
range_del_it->Next()) {
|
|
246
247
|
auto tombstone = range_del_it->Tombstone();
|
|
247
248
|
auto kv = tombstone.Serialize();
|
|
248
249
|
builder->Add(kv.first.Encode(), kv.second);
|
|
249
|
-
|
|
250
|
-
|
|
250
|
+
InternalKey tombstone_end = tombstone.SerializeEndKey();
|
|
251
|
+
meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
|
|
251
252
|
tboptions.internal_comparator);
|
|
253
|
+
if (version) {
|
|
254
|
+
if (last_tombstone_start_user_key.empty() ||
|
|
255
|
+
ucmp->CompareWithoutTimestamp(last_tombstone_start_user_key,
|
|
256
|
+
range_del_it->start_key()) < 0) {
|
|
257
|
+
SizeApproximationOptions approx_opts;
|
|
258
|
+
approx_opts.files_size_error_margin = 0.1;
|
|
259
|
+
meta->compensated_range_deletion_size += versions->ApproximateSize(
|
|
260
|
+
approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
|
|
261
|
+
0 /* start_level */, -1 /* end_level */,
|
|
262
|
+
TableReaderCaller::kFlush);
|
|
263
|
+
}
|
|
264
|
+
last_tombstone_start_user_key = range_del_it->start_key();
|
|
265
|
+
}
|
|
252
266
|
}
|
|
253
267
|
}
|
|
254
268
|
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#include "db/range_tombstone_fragmenter.h"
|
|
14
14
|
#include "db/seqno_to_time_mapping.h"
|
|
15
15
|
#include "db/table_properties_collector.h"
|
|
16
|
+
#include "db/version_set.h"
|
|
16
17
|
#include "logging/event_logger.h"
|
|
17
18
|
#include "options/cf_options.h"
|
|
18
19
|
#include "rocksdb/comparator.h"
|
|
@@ -70,7 +71,7 @@ extern Status BuildTable(
|
|
|
70
71
|
Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
|
|
71
72
|
const std::string* full_history_ts_low = nullptr,
|
|
72
73
|
BlobFileCompletionCallback* blob_callback = nullptr,
|
|
73
|
-
uint64_t* num_input_entries = nullptr,
|
|
74
|
+
Version* version = nullptr, uint64_t* num_input_entries = nullptr,
|
|
74
75
|
uint64_t* memtable_payload_bytes = nullptr,
|
|
75
76
|
uint64_t* memtable_garbage_bytes = nullptr);
|
|
76
77
|
|
|
@@ -2588,6 +2588,12 @@ void rocksdb_block_based_options_set_partition_filters(
|
|
|
2588
2588
|
options->rep.partition_filters = partition_filters;
|
|
2589
2589
|
}
|
|
2590
2590
|
|
|
2591
|
+
void rocksdb_block_based_options_set_optimize_filters_for_memory(
|
|
2592
|
+
rocksdb_block_based_table_options_t* options,
|
|
2593
|
+
unsigned char optimize_filters_for_memory) {
|
|
2594
|
+
options->rep.optimize_filters_for_memory = optimize_filters_for_memory;
|
|
2595
|
+
}
|
|
2596
|
+
|
|
2591
2597
|
void rocksdb_block_based_options_set_use_delta_encoding(
|
|
2592
2598
|
rocksdb_block_based_table_options_t* options,
|
|
2593
2599
|
unsigned char use_delta_encoding) {
|
|
@@ -4443,6 +4449,15 @@ rocksdb_readoptions_get_io_timeout(rocksdb_readoptions_t* opt) {
|
|
|
4443
4449
|
return opt->rep.io_timeout.count();
|
|
4444
4450
|
}
|
|
4445
4451
|
|
|
4452
|
+
void rocksdb_readoptions_set_async_io(rocksdb_readoptions_t* opt,
|
|
4453
|
+
unsigned char v) {
|
|
4454
|
+
opt->rep.async_io = v;
|
|
4455
|
+
}
|
|
4456
|
+
|
|
4457
|
+
unsigned char rocksdb_readoptions_get_async_io(rocksdb_readoptions_t* opt) {
|
|
4458
|
+
return opt->rep.async_io;
|
|
4459
|
+
}
|
|
4460
|
+
|
|
4446
4461
|
void rocksdb_readoptions_set_timestamp(rocksdb_readoptions_t* opt,
|
|
4447
4462
|
const char* ts, size_t tslen) {
|
|
4448
4463
|
if (ts == nullptr) {
|
|
@@ -2572,6 +2572,9 @@ int main(int argc, char** argv) {
|
|
|
2572
2572
|
rocksdb_readoptions_set_io_timeout(ro, 400);
|
|
2573
2573
|
CheckCondition(400 == rocksdb_readoptions_get_io_timeout(ro));
|
|
2574
2574
|
|
|
2575
|
+
rocksdb_readoptions_set_async_io(ro, 1);
|
|
2576
|
+
CheckCondition(1 == rocksdb_readoptions_get_async_io(ro));
|
|
2577
|
+
|
|
2575
2578
|
rocksdb_readoptions_destroy(ro);
|
|
2576
2579
|
}
|
|
2577
2580
|
|
|
@@ -557,7 +557,6 @@ ColumnFamilyData::ColumnFamilyData(
|
|
|
557
557
|
next_(nullptr),
|
|
558
558
|
prev_(nullptr),
|
|
559
559
|
log_number_(0),
|
|
560
|
-
flush_reason_(FlushReason::kOthers),
|
|
561
560
|
column_family_set_(column_family_set),
|
|
562
561
|
queued_for_flush_(false),
|
|
563
562
|
queued_for_compaction_(false),
|
|
@@ -565,7 +564,8 @@ ColumnFamilyData::ColumnFamilyData(
|
|
|
565
564
|
allow_2pc_(db_options.allow_2pc),
|
|
566
565
|
last_memtable_id_(0),
|
|
567
566
|
db_paths_registered_(false),
|
|
568
|
-
mempurge_used_(false)
|
|
567
|
+
mempurge_used_(false),
|
|
568
|
+
next_epoch_number_(1) {
|
|
569
569
|
if (id_ != kDummyColumnFamilyDataId) {
|
|
570
570
|
// TODO(cc): RegisterDbPaths can be expensive, considering moving it
|
|
571
571
|
// outside of this constructor which might be called with db mutex held.
|
|
@@ -1128,12 +1128,9 @@ bool ColumnFamilyData::NeedsCompaction() const {
|
|
|
1128
1128
|
Compaction* ColumnFamilyData::PickCompaction(
|
|
1129
1129
|
const MutableCFOptions& mutable_options,
|
|
1130
1130
|
const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
|
|
1131
|
-
SequenceNumber earliest_mem_seqno =
|
|
1132
|
-
std::min(mem_->GetEarliestSequenceNumber(),
|
|
1133
|
-
imm_.current()->GetEarliestSequenceNumber(false));
|
|
1134
1131
|
auto* result = compaction_picker_->PickCompaction(
|
|
1135
1132
|
GetName(), mutable_options, mutable_db_options, current_->storage_info(),
|
|
1136
|
-
log_buffer
|
|
1133
|
+
log_buffer);
|
|
1137
1134
|
if (result != nullptr) {
|
|
1138
1135
|
result->SetInputVersion(current_);
|
|
1139
1136
|
}
|
|
@@ -1220,6 +1217,7 @@ Compaction* ColumnFamilyData::CompactRange(
|
|
|
1220
1217
|
if (result != nullptr) {
|
|
1221
1218
|
result->SetInputVersion(current_);
|
|
1222
1219
|
}
|
|
1220
|
+
TEST_SYNC_POINT("ColumnFamilyData::CompactRange:Return");
|
|
1223
1221
|
return result;
|
|
1224
1222
|
}
|
|
1225
1223
|
|
|
@@ -1520,6 +1518,13 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
|
|
|
1520
1518
|
return data_dirs_[path_id].get();
|
|
1521
1519
|
}
|
|
1522
1520
|
|
|
1521
|
+
void ColumnFamilyData::RecoverEpochNumbers() {
|
|
1522
|
+
assert(current_);
|
|
1523
|
+
auto* vstorage = current_->storage_info();
|
|
1524
|
+
assert(vstorage);
|
|
1525
|
+
vstorage->RecoverEpochNumbers(this);
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1523
1528
|
ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
|
|
1524
1529
|
const ImmutableDBOptions* db_options,
|
|
1525
1530
|
const FileOptions& file_options,
|
|
@@ -310,10 +310,6 @@ class ColumnFamilyData {
|
|
|
310
310
|
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
|
|
311
311
|
uint64_t GetLogNumber() const { return log_number_; }
|
|
312
312
|
|
|
313
|
-
void SetFlushReason(FlushReason flush_reason) {
|
|
314
|
-
flush_reason_ = flush_reason;
|
|
315
|
-
}
|
|
316
|
-
FlushReason GetFlushReason() const { return flush_reason_; }
|
|
317
313
|
// thread-safe
|
|
318
314
|
const FileOptions* soptions() const;
|
|
319
315
|
const ImmutableOptions* ioptions() const { return &ioptions_; }
|
|
@@ -533,6 +529,24 @@ class ColumnFamilyData {
|
|
|
533
529
|
void SetMempurgeUsed() { mempurge_used_ = true; }
|
|
534
530
|
bool GetMempurgeUsed() { return mempurge_used_; }
|
|
535
531
|
|
|
532
|
+
// Allocate and return a new epoch number
|
|
533
|
+
uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
|
|
534
|
+
|
|
535
|
+
// Get the next epoch number to be assigned
|
|
536
|
+
uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
|
|
537
|
+
|
|
538
|
+
// Set the next epoch number to be assigned
|
|
539
|
+
void SetNextEpochNumber(uint64_t next_epoch_number) {
|
|
540
|
+
next_epoch_number_.store(next_epoch_number);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Reset the next epoch number to be assigned
|
|
544
|
+
void ResetNextEpochNumber() { next_epoch_number_.store(1); }
|
|
545
|
+
|
|
546
|
+
// Recover the next epoch number of this CF and epoch number
|
|
547
|
+
// of its files (if missing)
|
|
548
|
+
void RecoverEpochNumbers();
|
|
549
|
+
|
|
536
550
|
private:
|
|
537
551
|
friend class ColumnFamilySet;
|
|
538
552
|
ColumnFamilyData(uint32_t id, const std::string& name,
|
|
@@ -598,8 +612,6 @@ class ColumnFamilyData {
|
|
|
598
612
|
// recovered from
|
|
599
613
|
uint64_t log_number_;
|
|
600
614
|
|
|
601
|
-
std::atomic<FlushReason> flush_reason_;
|
|
602
|
-
|
|
603
615
|
// An object that keeps all the compaction stats
|
|
604
616
|
// and picks the next compaction
|
|
605
617
|
std::unique_ptr<CompactionPicker> compaction_picker_;
|
|
@@ -634,6 +646,8 @@ class ColumnFamilyData {
|
|
|
634
646
|
// a Version associated with this CFD
|
|
635
647
|
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
|
|
636
648
|
bool mempurge_used_;
|
|
649
|
+
|
|
650
|
+
std::atomic<uint64_t> next_epoch_number_;
|
|
637
651
|
};
|
|
638
652
|
|
|
639
653
|
// ColumnFamilySet has interesting thread-safety requirements
|
|
@@ -238,12 +238,19 @@ Compaction::Compaction(
|
|
|
238
238
|
inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))),
|
|
239
239
|
grandparents_(std::move(_grandparents)),
|
|
240
240
|
score_(_score),
|
|
241
|
-
bottommost_level_(
|
|
241
|
+
bottommost_level_(
|
|
242
|
+
// For simplicity, we don't support the concept of "bottommost level"
|
|
243
|
+
// with
|
|
244
|
+
// `CompactionReason::kExternalSstIngestion` and
|
|
245
|
+
// `CompactionReason::kRefitLevel`
|
|
246
|
+
(_compaction_reason == CompactionReason::kExternalSstIngestion ||
|
|
247
|
+
_compaction_reason == CompactionReason::kRefitLevel)
|
|
248
|
+
? false
|
|
249
|
+
: IsBottommostLevel(output_level_, vstorage, inputs_)),
|
|
242
250
|
is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
|
|
243
251
|
is_manual_compaction_(_manual_compaction),
|
|
244
252
|
trim_ts_(_trim_ts),
|
|
245
253
|
is_trivial_move_(false),
|
|
246
|
-
|
|
247
254
|
compaction_reason_(_compaction_reason),
|
|
248
255
|
notify_on_compaction_completion_(false),
|
|
249
256
|
enable_blob_garbage_collection_(
|
|
@@ -258,8 +265,15 @@ Compaction::Compaction(
|
|
|
258
265
|
_blob_garbage_collection_age_cutoff > 1
|
|
259
266
|
? mutable_cf_options()->blob_garbage_collection_age_cutoff
|
|
260
267
|
: _blob_garbage_collection_age_cutoff),
|
|
261
|
-
penultimate_level_(
|
|
262
|
-
|
|
268
|
+
penultimate_level_(
|
|
269
|
+
// For simplicity, we don't support the concept of "penultimate level"
|
|
270
|
+
// with `CompactionReason::kExternalSstIngestion` and
|
|
271
|
+
// `CompactionReason::kRefitLevel`
|
|
272
|
+
_compaction_reason == CompactionReason::kExternalSstIngestion ||
|
|
273
|
+
_compaction_reason == CompactionReason::kRefitLevel
|
|
274
|
+
? Compaction::kInvalidLevel
|
|
275
|
+
: EvaluatePenultimateLevel(vstorage, immutable_options_,
|
|
276
|
+
start_level_, output_level_)) {
|
|
263
277
|
MarkFilesBeingCompacted(true);
|
|
264
278
|
if (is_manual_compaction_) {
|
|
265
279
|
compaction_reason_ = CompactionReason::kManualCompaction;
|
|
@@ -332,6 +346,7 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
332
346
|
// the case that the penultimate level is empty).
|
|
333
347
|
if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
|
|
334
348
|
exclude_level = kInvalidLevel;
|
|
349
|
+
penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
|
|
335
350
|
std::set<uint64_t> penultimate_inputs;
|
|
336
351
|
for (const auto& input_lvl : inputs_) {
|
|
337
352
|
if (input_lvl.level == penultimate_level_) {
|
|
@@ -345,7 +360,8 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
345
360
|
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
346
361
|
penultimate_inputs.end()) {
|
|
347
362
|
exclude_level = number_levels_ - 1;
|
|
348
|
-
penultimate_output_range_type_ =
|
|
363
|
+
penultimate_output_range_type_ =
|
|
364
|
+
PenultimateOutputRangeType::kNonLastRange;
|
|
349
365
|
break;
|
|
350
366
|
}
|
|
351
367
|
}
|
|
@@ -354,35 +370,6 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
354
370
|
GetBoundaryKeys(input_vstorage_, inputs_,
|
|
355
371
|
&penultimate_level_smallest_user_key_,
|
|
356
372
|
&penultimate_level_largest_user_key_, exclude_level);
|
|
357
|
-
|
|
358
|
-
// If there's a case that the penultimate level output range is overlapping
|
|
359
|
-
// with the existing files, disable the penultimate level output by setting
|
|
360
|
-
// the range to empty. One example is the range delete could have overlap
|
|
361
|
-
// boundary with the next file. (which is actually a false overlap)
|
|
362
|
-
// TODO: Exclude such false overlap, so it won't disable the penultimate
|
|
363
|
-
// output.
|
|
364
|
-
std::set<uint64_t> penultimate_inputs;
|
|
365
|
-
for (const auto& input_lvl : inputs_) {
|
|
366
|
-
if (input_lvl.level == penultimate_level_) {
|
|
367
|
-
for (const auto& file : input_lvl.files) {
|
|
368
|
-
penultimate_inputs.emplace(file->fd.GetNumber());
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
|
|
374
|
-
for (const auto& file : penultimate_files) {
|
|
375
|
-
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
376
|
-
penultimate_inputs.end() &&
|
|
377
|
-
OverlapPenultimateLevelOutputRange(file->smallest.user_key(),
|
|
378
|
-
file->largest.user_key())) {
|
|
379
|
-
// basically disable the penultimate range output. which should be rare
|
|
380
|
-
// or a false overlap caused by range del
|
|
381
|
-
penultimate_level_smallest_user_key_ = "";
|
|
382
|
-
penultimate_level_largest_user_key_ = "";
|
|
383
|
-
penultimate_output_range_type_ = PenultimateOutputRangeType::kDisabled;
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
373
|
}
|
|
387
374
|
|
|
388
375
|
Compaction::~Compaction() {
|
|
@@ -807,6 +794,16 @@ uint64_t Compaction::MinInputFileOldestAncesterTime(
|
|
|
807
794
|
return min_oldest_ancester_time;
|
|
808
795
|
}
|
|
809
796
|
|
|
797
|
+
uint64_t Compaction::MinInputFileEpochNumber() const {
|
|
798
|
+
uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
|
|
799
|
+
for (const auto& inputs_per_level : inputs_) {
|
|
800
|
+
for (const auto& file : inputs_per_level.files) {
|
|
801
|
+
min_epoch_number = std::min(min_epoch_number, file->epoch_number);
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
return min_epoch_number;
|
|
805
|
+
}
|
|
806
|
+
|
|
810
807
|
int Compaction::EvaluatePenultimateLevel(
|
|
811
808
|
const VersionStorageInfo* vstorage,
|
|
812
809
|
const ImmutableOptions& immutable_options, const int start_level,
|
|
@@ -378,6 +378,9 @@ class Compaction {
|
|
|
378
378
|
// This is used to filter out some input files' ancester's time range.
|
|
379
379
|
uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
|
|
380
380
|
const InternalKey* end) const;
|
|
381
|
+
// Return the minimum epoch number among
|
|
382
|
+
// input files' associated with this compaction
|
|
383
|
+
uint64_t MinInputFileEpochNumber() const;
|
|
381
384
|
|
|
382
385
|
// Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
|
|
383
386
|
// compaction begin and compaction completion callbacks match.
|