@nxtedition/rocksdb 12.1.4 → 12.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +1 -1
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +9 -7
- package/deps/rocksdb/rocksdb/cache/cache.cc +15 -11
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +26 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +16 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +38 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +4 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +11 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +56 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +9 -0
- package/deps/rocksdb/rocksdb/db/c.cc +9 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +12 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +6 -23
- package/deps/rocksdb/rocksdb/db/column_family.h +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +19 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +34 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +16 -31
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +7 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +95 -84
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +616 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +8 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +93 -69
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +353 -89
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +116 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +67 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +42 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +50 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +79 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +36 -59
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +72 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -12
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +75 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +36 -22
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +23 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +28 -3
- package/deps/rocksdb/rocksdb/db/error_handler.h +2 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +165 -33
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +37 -28
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -6
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -6
- package/deps/rocksdb/rocksdb/db/job_context.h +4 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +24 -14
- package/deps/rocksdb/rocksdb/db/memtable.h +2 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +61 -33
- package/deps/rocksdb/rocksdb/db/memtable_list.h +8 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -2
- package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
- package/deps/rocksdb/rocksdb/db/version_builder.cc +14 -11
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +20 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +40 -30
- package/deps/rocksdb/rocksdb/db/version_set.h +13 -3
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -76
- package/deps/rocksdb/rocksdb/db/write_batch.cc +6 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +25 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +11 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +5 -0
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +10 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +30 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +10 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +287 -83
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +68 -36
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +4 -4
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +31 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +14 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +15 -4
- package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +5 -4
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +38 -45
- package/deps/rocksdb/rocksdb/port/port.h +16 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +8 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +10 -20
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -9
- package/deps/rocksdb/rocksdb/table/format.cc +32 -4
- package/deps/rocksdb/rocksdb/table/format.h +12 -1
- package/deps/rocksdb/rocksdb/table/iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +214 -161
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +4 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +3 -0
- package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -473,7 +473,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
|
|
473
473
|
|
|
474
474
|
if (s.ok()) {
|
|
475
475
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
476
|
-
|
|
476
|
+
EnqueuePendingCompaction(cfd);
|
|
477
477
|
}
|
|
478
478
|
MaybeScheduleFlushOrCompaction();
|
|
479
479
|
}
|
|
@@ -530,6 +530,11 @@ Status DBImpl::MaybeReleaseTimestampedSnapshotsAndCheck() {
|
|
|
530
530
|
return Status::OK();
|
|
531
531
|
}
|
|
532
532
|
|
|
533
|
+
void DBImpl::UntrackDataFiles() {
|
|
534
|
+
TrackOrUntrackFiles(/*existing_data_files=*/{},
|
|
535
|
+
/*track=*/false);
|
|
536
|
+
}
|
|
537
|
+
|
|
533
538
|
Status DBImpl::CloseHelper() {
|
|
534
539
|
// Guarantee that there is no background error recovery in progress before
|
|
535
540
|
// continuing with the shutdown
|
|
@@ -654,8 +659,9 @@ Status DBImpl::CloseHelper() {
|
|
|
654
659
|
// We need to release them before the block cache is destroyed. The block
|
|
655
660
|
// cache may be destroyed inside versions_.reset(), when column family data
|
|
656
661
|
// list is destroyed, so leaving handles in table cache after
|
|
657
|
-
// versions_.reset() may cause issues.
|
|
658
|
-
//
|
|
662
|
+
// versions_.reset() may cause issues. Here we clean all unreferenced handles
|
|
663
|
+
// in table cache, and (for certain builds/conditions) assert that no obsolete
|
|
664
|
+
// files are hanging around unreferenced (leak) in the table/blob file cache.
|
|
659
665
|
// Now we assume all user queries have finished, so only version set itself
|
|
660
666
|
// can possibly hold the blocks from block cache. After releasing unreferenced
|
|
661
667
|
// handles here, only handles held by version set left and inside
|
|
@@ -663,12 +669,22 @@ Status DBImpl::CloseHelper() {
|
|
|
663
669
|
// time a handle is released, we erase it from the cache too. By doing that,
|
|
664
670
|
// we can guarantee that after versions_.reset(), table cache is empty
|
|
665
671
|
// so the cache can be safely destroyed.
|
|
672
|
+
#ifndef NDEBUG
|
|
673
|
+
TEST_VerifyNoObsoleteFilesCached(/*db_mutex_already_held=*/true);
|
|
674
|
+
#endif // !NDEBUG
|
|
666
675
|
table_cache_->EraseUnRefEntries();
|
|
667
676
|
|
|
668
677
|
for (auto& txn_entry : recovered_transactions_) {
|
|
669
678
|
delete txn_entry.second;
|
|
670
679
|
}
|
|
671
680
|
|
|
681
|
+
// Return an unowned SstFileManager to a consistent state
|
|
682
|
+
if (immutable_db_options_.sst_file_manager && !own_sfm_) {
|
|
683
|
+
mutex_.Unlock();
|
|
684
|
+
UntrackDataFiles();
|
|
685
|
+
mutex_.Lock();
|
|
686
|
+
}
|
|
687
|
+
|
|
672
688
|
// versions need to be destroyed before table_cache since it can hold
|
|
673
689
|
// references to table_cache.
|
|
674
690
|
{
|
|
@@ -1526,7 +1542,7 @@ Status DBImpl::FlushWAL(const WriteOptions& write_options, bool sync) {
|
|
|
1526
1542
|
io_s.ToString().c_str());
|
|
1527
1543
|
// In case there is a fs error we should set it globally to prevent the
|
|
1528
1544
|
// future writes
|
|
1529
|
-
|
|
1545
|
+
WALIOStatusCheck(io_s);
|
|
1530
1546
|
// whether sync or not, we should abort the rest of function upon error
|
|
1531
1547
|
return static_cast<Status>(io_s);
|
|
1532
1548
|
}
|
|
@@ -1683,7 +1699,7 @@ IOStatus DBImpl::SyncWalImpl(bool include_current_wal,
|
|
|
1683
1699
|
io_s.ToString().c_str());
|
|
1684
1700
|
// In case there is a fs error we should set it globally to prevent the
|
|
1685
1701
|
// future writes
|
|
1686
|
-
|
|
1702
|
+
WALIOStatusCheck(io_s);
|
|
1687
1703
|
}
|
|
1688
1704
|
if (io_s.ok() && need_wal_dir_sync) {
|
|
1689
1705
|
io_s = directories_.GetWalDir()->FsyncWithDirOptions(
|
|
@@ -2054,15 +2070,19 @@ InternalIterator* DBImpl::NewInternalIterator(
|
|
|
2054
2070
|
bool allow_unprepared_value, ArenaWrappedDBIter* db_iter) {
|
|
2055
2071
|
InternalIterator* internal_iter;
|
|
2056
2072
|
assert(arena != nullptr);
|
|
2073
|
+
auto prefix_extractor =
|
|
2074
|
+
super_version->mutable_cf_options.prefix_extractor.get();
|
|
2057
2075
|
// Need to create internal iterator from the arena.
|
|
2058
2076
|
MergeIteratorBuilder merge_iter_builder(
|
|
2059
2077
|
&cfd->internal_comparator(), arena,
|
|
2060
|
-
|
|
2061
|
-
|
|
2078
|
+
// FIXME? It's not clear what interpretation of prefix seek is needed
|
|
2079
|
+
// here, and no unit test cares about the value provided here.
|
|
2080
|
+
!read_options.total_order_seek && prefix_extractor != nullptr,
|
|
2062
2081
|
read_options.iterate_upper_bound);
|
|
2063
2082
|
// Collect iterator for mutable memtable
|
|
2064
2083
|
auto mem_iter = super_version->mem->NewIterator(
|
|
2065
|
-
read_options, super_version->GetSeqnoToTimeMapping(), arena
|
|
2084
|
+
read_options, super_version->GetSeqnoToTimeMapping(), arena,
|
|
2085
|
+
super_version->mutable_cf_options.prefix_extractor.get());
|
|
2066
2086
|
Status s;
|
|
2067
2087
|
if (!read_options.ignore_range_deletions) {
|
|
2068
2088
|
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter;
|
|
@@ -2086,6 +2106,7 @@ InternalIterator* DBImpl::NewInternalIterator(
|
|
|
2086
2106
|
if (s.ok()) {
|
|
2087
2107
|
super_version->imm->AddIterators(
|
|
2088
2108
|
read_options, super_version->GetSeqnoToTimeMapping(),
|
|
2109
|
+
super_version->mutable_cf_options.prefix_extractor.get(),
|
|
2089
2110
|
&merge_iter_builder, !read_options.ignore_range_deletions);
|
|
2090
2111
|
}
|
|
2091
2112
|
TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s);
|
|
@@ -3210,6 +3231,8 @@ Status DBImpl::MultiGetImpl(
|
|
|
3210
3231
|
s = Status::Aborted();
|
|
3211
3232
|
break;
|
|
3212
3233
|
}
|
|
3234
|
+
// This could be a long-running operation
|
|
3235
|
+
ROCKSDB_THREAD_YIELD_HOOK();
|
|
3213
3236
|
}
|
|
3214
3237
|
|
|
3215
3238
|
// Post processing (decrement reference counts and record statistics)
|
|
@@ -3831,6 +3854,9 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options,
|
|
|
3831
3854
|
}
|
|
3832
3855
|
}
|
|
3833
3856
|
if (read_options.tailing) {
|
|
3857
|
+
read_options.total_order_seek |=
|
|
3858
|
+
immutable_db_options_.prefix_seek_opt_in_only;
|
|
3859
|
+
|
|
3834
3860
|
auto iter = new ForwardIterator(this, read_options, cfd, sv,
|
|
3835
3861
|
/* allow_unprepared_value */ true);
|
|
3836
3862
|
result = NewDBIterator(
|
|
@@ -4032,6 +4058,9 @@ Status DBImpl::NewIterators(
|
|
|
4032
4058
|
|
|
4033
4059
|
assert(cf_sv_pairs.size() == column_families.size());
|
|
4034
4060
|
if (read_options.tailing) {
|
|
4061
|
+
read_options.total_order_seek |=
|
|
4062
|
+
immutable_db_options_.prefix_seek_opt_in_only;
|
|
4063
|
+
|
|
4035
4064
|
for (const auto& cf_sv_pair : cf_sv_pairs) {
|
|
4036
4065
|
auto iter = new ForwardIterator(this, read_options, cf_sv_pair.cfd,
|
|
4037
4066
|
cf_sv_pair.super_version,
|
|
@@ -4282,7 +4311,7 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
|
|
|
4282
4311
|
->storage_info()
|
|
4283
4312
|
->BottommostFilesMarkedForCompaction()
|
|
4284
4313
|
.empty()) {
|
|
4285
|
-
|
|
4314
|
+
EnqueuePendingCompaction(cfd);
|
|
4286
4315
|
MaybeScheduleFlushOrCompaction();
|
|
4287
4316
|
cf_scheduled.push_back(cfd);
|
|
4288
4317
|
}
|
|
@@ -4756,6 +4785,24 @@ void DBImpl::ReleaseFileNumberFromPendingOutputs(
|
|
|
4756
4785
|
}
|
|
4757
4786
|
}
|
|
4758
4787
|
|
|
4788
|
+
std::list<uint64_t>::iterator DBImpl::CaptureOptionsFileNumber() {
|
|
4789
|
+
// We need to remember the iterator of our insert, because after the
|
|
4790
|
+
// compaction is done, we need to remove that element from
|
|
4791
|
+
// min_options_file_numbers_.
|
|
4792
|
+
min_options_file_numbers_.push_back(versions_->options_file_number());
|
|
4793
|
+
auto min_options_file_numbers_inserted_elem = min_options_file_numbers_.end();
|
|
4794
|
+
--min_options_file_numbers_inserted_elem;
|
|
4795
|
+
return min_options_file_numbers_inserted_elem;
|
|
4796
|
+
}
|
|
4797
|
+
|
|
4798
|
+
void DBImpl::ReleaseOptionsFileNumber(
|
|
4799
|
+
std::unique_ptr<std::list<uint64_t>::iterator>& v) {
|
|
4800
|
+
if (v.get() != nullptr) {
|
|
4801
|
+
min_options_file_numbers_.erase(*v.get());
|
|
4802
|
+
v.reset();
|
|
4803
|
+
}
|
|
4804
|
+
}
|
|
4805
|
+
|
|
4759
4806
|
Status DBImpl::GetUpdatesSince(
|
|
4760
4807
|
SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
|
|
4761
4808
|
const TransactionLogIterator::ReadOptions& read_options) {
|
|
@@ -5836,11 +5883,10 @@ Status DBImpl::IngestExternalFiles(
|
|
|
5836
5883
|
"write_global_seqno is deprecated and does not work with "
|
|
5837
5884
|
"allow_db_generated_files.");
|
|
5838
5885
|
}
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
}
|
|
5886
|
+
}
|
|
5887
|
+
if (ingest_opts.move_files && ingest_opts.link_files) {
|
|
5888
|
+
return Status::InvalidArgument(
|
|
5889
|
+
"`move_files` and `link_files` can not both be true.");
|
|
5844
5890
|
}
|
|
5845
5891
|
}
|
|
5846
5892
|
|
|
@@ -6748,6 +6794,62 @@ void DBImpl::RecordSeqnoToTimeMapping(uint64_t populate_historical_seconds) {
|
|
|
6748
6794
|
}
|
|
6749
6795
|
}
|
|
6750
6796
|
|
|
6797
|
+
void DBImpl::TrackOrUntrackFiles(
|
|
6798
|
+
const std::vector<std::string>& existing_data_files, bool track) {
|
|
6799
|
+
auto sfm = static_cast_with_check<SstFileManagerImpl>(
|
|
6800
|
+
immutable_db_options_.sst_file_manager.get());
|
|
6801
|
+
assert(sfm);
|
|
6802
|
+
std::vector<ColumnFamilyMetaData> metadata;
|
|
6803
|
+
GetAllColumnFamilyMetaData(&metadata);
|
|
6804
|
+
auto action = [&](const std::string& file_path,
|
|
6805
|
+
std::optional<uint64_t> size) {
|
|
6806
|
+
if (track) {
|
|
6807
|
+
if (size) {
|
|
6808
|
+
sfm->OnAddFile(file_path, *size).PermitUncheckedError();
|
|
6809
|
+
} else {
|
|
6810
|
+
sfm->OnAddFile(file_path).PermitUncheckedError();
|
|
6811
|
+
}
|
|
6812
|
+
} else {
|
|
6813
|
+
sfm->OnUntrackFile(file_path).PermitUncheckedError();
|
|
6814
|
+
}
|
|
6815
|
+
};
|
|
6816
|
+
|
|
6817
|
+
std::unordered_set<std::string> referenced_files;
|
|
6818
|
+
for (const auto& md : metadata) {
|
|
6819
|
+
for (const auto& lmd : md.levels) {
|
|
6820
|
+
for (const auto& fmd : lmd.files) {
|
|
6821
|
+
// We're assuming that each sst file name exists in at most one of
|
|
6822
|
+
// the paths.
|
|
6823
|
+
std::string file_path =
|
|
6824
|
+
fmd.directory + kFilePathSeparator + fmd.relative_filename;
|
|
6825
|
+
action(file_path, fmd.size);
|
|
6826
|
+
referenced_files.insert(file_path);
|
|
6827
|
+
}
|
|
6828
|
+
}
|
|
6829
|
+
for (const auto& bmd : md.blob_files) {
|
|
6830
|
+
std::string name = bmd.blob_file_name;
|
|
6831
|
+
// The BlobMetaData.blob_file_name may start with "/".
|
|
6832
|
+
if (!name.empty() && name[0] == kFilePathSeparator) {
|
|
6833
|
+
name = name.substr(1);
|
|
6834
|
+
}
|
|
6835
|
+
// We're assuming that each blob file name exists in at most one of
|
|
6836
|
+
// the paths.
|
|
6837
|
+
std::string file_path = bmd.blob_file_path + kFilePathSeparator + name;
|
|
6838
|
+
action(file_path, bmd.blob_file_size);
|
|
6839
|
+
referenced_files.insert(file_path);
|
|
6840
|
+
}
|
|
6841
|
+
}
|
|
6842
|
+
|
|
6843
|
+
for (const auto& file_path : existing_data_files) {
|
|
6844
|
+
if (referenced_files.find(file_path) != referenced_files.end()) {
|
|
6845
|
+
continue;
|
|
6846
|
+
}
|
|
6847
|
+
// There shouldn't be any duplicated files. In case there is, SstFileManager
|
|
6848
|
+
// will take care of deduping it.
|
|
6849
|
+
action(file_path, /*size=*/std::nullopt);
|
|
6850
|
+
}
|
|
6851
|
+
}
|
|
6852
|
+
|
|
6751
6853
|
void DBImpl::InstallSeqnoToTimeMappingInSV(
|
|
6752
6854
|
std::vector<SuperVersionContext>* sv_contexts) {
|
|
6753
6855
|
mutex_.AssertHeld();
|
|
@@ -853,6 +853,8 @@ class DBImpl : public DB {
|
|
|
853
853
|
|
|
854
854
|
uint64_t GetObsoleteSstFilesSize();
|
|
855
855
|
|
|
856
|
+
uint64_t MinOptionsFileNumberToKeep();
|
|
857
|
+
|
|
856
858
|
// Returns the list of live files in 'live' and the list
|
|
857
859
|
// of all files in the filesystem in 'candidate_files'.
|
|
858
860
|
// If force == false and the last call was less than
|
|
@@ -1151,6 +1153,8 @@ class DBImpl : public DB {
|
|
|
1151
1153
|
// Get the background error status
|
|
1152
1154
|
Status TEST_GetBGError();
|
|
1153
1155
|
|
|
1156
|
+
bool TEST_IsRecoveryInProgress();
|
|
1157
|
+
|
|
1154
1158
|
// Return the maximum overlapping data (in bytes) at next level for any
|
|
1155
1159
|
// file at a level >= 1.
|
|
1156
1160
|
uint64_t TEST_MaxNextLevelOverlappingBytes(
|
|
@@ -1237,9 +1241,14 @@ class DBImpl : public DB {
|
|
|
1237
1241
|
static Status TEST_ValidateOptions(const DBOptions& db_options) {
|
|
1238
1242
|
return ValidateOptions(db_options);
|
|
1239
1243
|
}
|
|
1240
|
-
|
|
1241
1244
|
#endif // NDEBUG
|
|
1242
1245
|
|
|
1246
|
+
// In certain configurations, verify that the table/blob file cache only
|
|
1247
|
+
// contains entries for live files, to check for effective leaks of open
|
|
1248
|
+
// files. This can only be called when purging of obsolete files has
|
|
1249
|
+
// "settled," such as during parts of DB Close().
|
|
1250
|
+
void TEST_VerifyNoObsoleteFilesCached(bool db_mutex_already_held) const;
|
|
1251
|
+
|
|
1243
1252
|
// persist stats to column family "_persistent_stats"
|
|
1244
1253
|
void PersistStats();
|
|
1245
1254
|
|
|
@@ -1582,11 +1591,12 @@ class DBImpl : public DB {
|
|
|
1582
1591
|
|
|
1583
1592
|
virtual bool OwnTablesAndLogs() const { return true; }
|
|
1584
1593
|
|
|
1585
|
-
//
|
|
1594
|
+
// Read/create DB identity file (as appropriate), and write DB ID to
|
|
1595
|
+
// version_edit if provided.
|
|
1586
1596
|
Status SetupDBId(const WriteOptions& write_options, bool read_only,
|
|
1587
|
-
|
|
1588
|
-
// Assign db_id_ and write DB ID to
|
|
1589
|
-
void SetDBId(std::string&& id, bool read_only,
|
|
1597
|
+
bool is_new_db, VersionEdit* version_edit);
|
|
1598
|
+
// Assign db_id_ and write DB ID to version_edit if provided.
|
|
1599
|
+
void SetDBId(std::string&& id, bool read_only, VersionEdit* version_edit);
|
|
1590
1600
|
|
|
1591
1601
|
// Collect a deduplicated collection of paths used by this DB, including
|
|
1592
1602
|
// dbname_, DBOptions.db_paths, ColumnFamilyOptions.cf_paths.
|
|
@@ -1616,9 +1626,15 @@ class DBImpl : public DB {
|
|
|
1616
1626
|
// vast majority of all files), since it already has the file size
|
|
1617
1627
|
// on record, we don't need to query the file system. Otherwise, we query the
|
|
1618
1628
|
// file system for the size of an unreferenced file.
|
|
1629
|
+
// REQUIRES: mutex unlocked
|
|
1619
1630
|
void TrackExistingDataFiles(
|
|
1620
1631
|
const std::vector<std::string>& existing_data_files);
|
|
1621
1632
|
|
|
1633
|
+
// Untrack data files in sst manager. This is only called during DB::Close on
|
|
1634
|
+
// an unowned SstFileManager, to return it to a consistent state.
|
|
1635
|
+
// REQUIRES: mutex unlocked
|
|
1636
|
+
void UntrackDataFiles();
|
|
1637
|
+
|
|
1622
1638
|
// SetDbSessionId() should be called in the constuctor DBImpl()
|
|
1623
1639
|
// to ensure that db_session_id_ gets updated every time the DB is opened
|
|
1624
1640
|
void SetDbSessionId();
|
|
@@ -1685,6 +1701,8 @@ class DBImpl : public DB {
|
|
|
1685
1701
|
friend class XFTransactionWriteHandler;
|
|
1686
1702
|
friend class DBBlobIndexTest;
|
|
1687
1703
|
friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
|
|
1704
|
+
friend class CompactionServiceTest_PreservedOptionsLocalCompaction_Test;
|
|
1705
|
+
friend class CompactionServiceTest_PreservedOptionsRemoteCompaction_Test;
|
|
1688
1706
|
#endif
|
|
1689
1707
|
|
|
1690
1708
|
struct CompactionState;
|
|
@@ -1956,6 +1974,13 @@ class DBImpl : public DB {
|
|
|
1956
1974
|
void ReleaseFileNumberFromPendingOutputs(
|
|
1957
1975
|
std::unique_ptr<std::list<uint64_t>::iterator>& v);
|
|
1958
1976
|
|
|
1977
|
+
// Similar to pending_outputs, preserve OPTIONS file. Used for remote
|
|
1978
|
+
// compaction.
|
|
1979
|
+
std::list<uint64_t>::iterator CaptureOptionsFileNumber();
|
|
1980
|
+
void ReleaseOptionsFileNumber(
|
|
1981
|
+
std::unique_ptr<std::list<uint64_t>::iterator>& v);
|
|
1982
|
+
|
|
1983
|
+
// Sets bg error if there is an error writing to WAL.
|
|
1959
1984
|
IOStatus SyncClosedWals(const WriteOptions& write_options,
|
|
1960
1985
|
JobContext* job_context, VersionEdit* synced_wals,
|
|
1961
1986
|
bool error_recovery_in_prog);
|
|
@@ -2174,7 +2199,7 @@ class DBImpl : public DB {
|
|
|
2174
2199
|
|
|
2175
2200
|
// Used by WriteImpl to update bg_error_ when IO error happens, e.g., write
|
|
2176
2201
|
// WAL, sync WAL fails, if paranoid check is enabled.
|
|
2177
|
-
void
|
|
2202
|
+
void WALIOStatusCheck(const IOStatus& status);
|
|
2178
2203
|
|
|
2179
2204
|
// Used by WriteImpl to update bg_error_ in case of memtable insert error.
|
|
2180
2205
|
void MemTableInsertStatusCheck(const Status& memtable_insert_status);
|
|
@@ -2187,6 +2212,10 @@ class DBImpl : public DB {
|
|
|
2187
2212
|
JobContext* job_context, LogBuffer* log_buffer,
|
|
2188
2213
|
CompactionJobInfo* compaction_job_info);
|
|
2189
2214
|
|
|
2215
|
+
// REQUIRES: mutex unlocked
|
|
2216
|
+
void TrackOrUntrackFiles(const std::vector<std::string>& existing_data_files,
|
|
2217
|
+
bool track);
|
|
2218
|
+
|
|
2190
2219
|
ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);
|
|
2191
2220
|
|
|
2192
2221
|
void MaybeScheduleFlushOrCompaction();
|
|
@@ -2216,10 +2245,27 @@ class DBImpl : public DB {
|
|
|
2216
2245
|
void GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
|
2217
2246
|
FlushReason flush_reason, FlushRequest* req);
|
|
2218
2247
|
|
|
2248
|
+
// Below functions are for executing flush, compaction in the background. A
|
|
2249
|
+
// dequeue is the communication channel between threads that asks for the work
|
|
2250
|
+
// to be done and the available threads in the thread pool that pick it up to
|
|
2251
|
+
// execute it. We use these terminologies to describe the state of the work
|
|
2252
|
+
// and its transitions:
|
|
2253
|
+
// 1) It becomes pending once it's successfully enqueued into the
|
|
2254
|
+
// corresponding dequeue, a work in this state is also called unscheduled.
|
|
2255
|
+
// Counter `unscheduled_*_` counts work in this state.
|
|
2256
|
+
// 2) When `MaybeScheduleFlushOrCompaction` schedule a thread to run `BGWork*`
|
|
2257
|
+
// for the work, it becomes scheduled
|
|
2258
|
+
// Counter `bg_*_scheduled_` counts work in this state.
|
|
2259
|
+
// 3) Once the thread start to execute `BGWork*`, the work is popped from the
|
|
2260
|
+
// dequeue, it is now in running state
|
|
2261
|
+
// Counter `num_running_*_` counts work in this state.
|
|
2262
|
+
// 4) Eventually, the work is finished. We don't need to specifically track
|
|
2263
|
+
// finished work.
|
|
2264
|
+
|
|
2219
2265
|
// Returns true if `req` is successfully enqueued.
|
|
2220
|
-
bool
|
|
2266
|
+
bool EnqueuePendingFlush(const FlushRequest& req);
|
|
2221
2267
|
|
|
2222
|
-
void
|
|
2268
|
+
void EnqueuePendingCompaction(ColumnFamilyData* cfd);
|
|
2223
2269
|
void SchedulePendingPurge(std::string fname, std::string dir_to_sync,
|
|
2224
2270
|
FileType type, uint64_t number, int job_id);
|
|
2225
2271
|
static void BGWorkCompaction(void* arg);
|
|
@@ -2724,6 +2770,11 @@ class DBImpl : public DB {
|
|
|
2724
2770
|
// State is protected with db mutex.
|
|
2725
2771
|
std::list<uint64_t> pending_outputs_;
|
|
2726
2772
|
|
|
2773
|
+
// Similar to pending_outputs_, FindObsoleteFiles()/PurgeObsoleteFiles() never
|
|
2774
|
+
// deletes any OPTIONS file that has number bigger than any of the file number
|
|
2775
|
+
// in min_options_file_numbers_.
|
|
2776
|
+
std::list<uint64_t> min_options_file_numbers_;
|
|
2777
|
+
|
|
2727
2778
|
// flush_queue_ and compaction_queue_ hold column families that we need to
|
|
2728
2779
|
// flush and compact, respectively.
|
|
2729
2780
|
// A column family is inserted into flush_queue_ when it satisfies condition
|
|
@@ -2946,6 +2997,14 @@ DBOptions SanitizeOptions(const std::string& db, const DBOptions& src,
|
|
|
2946
2997
|
CompressionType GetCompressionFlush(const ImmutableCFOptions& ioptions,
|
|
2947
2998
|
const MutableCFOptions& mutable_cf_options);
|
|
2948
2999
|
|
|
3000
|
+
// Return a VersionEdit for the DB's recovery when the `memtables` of the
|
|
3001
|
+
// specified column family are obsolete. Specifically, the min log number to
|
|
3002
|
+
// keep, and the WAL files that can be deleted.
|
|
3003
|
+
VersionEdit GetDBRecoveryEditForObsoletingMemTables(
|
|
3004
|
+
VersionSet* vset, const ColumnFamilyData& cfd,
|
|
3005
|
+
const autovector<VersionEdit*>& edit_list,
|
|
3006
|
+
const autovector<MemTable*>& memtables, LogsWithPrepTracker* prep_tracker);
|
|
3007
|
+
|
|
2949
3008
|
// Return the earliest log file to keep after the memtable flush is
|
|
2950
3009
|
// finalized.
|
|
2951
3010
|
// `cfd_to_flush` is the column family whose memtable (specified in
|
|
@@ -1561,6 +1561,12 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1561
1561
|
|
|
1562
1562
|
compaction_job.Prepare();
|
|
1563
1563
|
|
|
1564
|
+
std::unique_ptr<std::list<uint64_t>::iterator> min_options_file_number_elem;
|
|
1565
|
+
if (immutable_db_options().compaction_service != nullptr) {
|
|
1566
|
+
min_options_file_number_elem.reset(
|
|
1567
|
+
new std::list<uint64_t>::iterator(CaptureOptionsFileNumber()));
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1564
1570
|
mutex_.Unlock();
|
|
1565
1571
|
TEST_SYNC_POINT("CompactFilesImpl:0");
|
|
1566
1572
|
TEST_SYNC_POINT("CompactFilesImpl:1");
|
|
@@ -1570,6 +1576,10 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1570
1576
|
TEST_SYNC_POINT("CompactFilesImpl:3");
|
|
1571
1577
|
mutex_.Lock();
|
|
1572
1578
|
|
|
1579
|
+
if (immutable_db_options().compaction_service != nullptr) {
|
|
1580
|
+
ReleaseOptionsFileNumber(min_options_file_number_elem);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1573
1583
|
bool compaction_released = false;
|
|
1574
1584
|
Status status =
|
|
1575
1585
|
compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
|
|
@@ -1880,7 +1890,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|
|
1880
1890
|
Status status = versions_->LogAndApply(cfd, mutable_cf_options,
|
|
1881
1891
|
read_options, write_options, &edit,
|
|
1882
1892
|
&mutex_, directories_.GetDbDir());
|
|
1883
|
-
|
|
1893
|
+
c->MarkFilesBeingCompacted(false);
|
|
1884
1894
|
cfd->compaction_picker()->UnregisterCompaction(c.get());
|
|
1885
1895
|
c.reset();
|
|
1886
1896
|
|
|
@@ -2377,7 +2387,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2377
2387
|
ColumnFamilyData* loop_cfd =
|
|
2378
2388
|
req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
2379
2389
|
bool already_queued_for_flush = loop_cfd->queued_for_flush();
|
|
2380
|
-
bool flush_req_enqueued =
|
|
2390
|
+
bool flush_req_enqueued = EnqueuePendingFlush(req);
|
|
2381
2391
|
if (already_queued_for_flush || flush_req_enqueued) {
|
|
2382
2392
|
loop_cfd->SetFlushSkipReschedule();
|
|
2383
2393
|
}
|
|
@@ -2528,7 +2538,7 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2528
2538
|
}
|
|
2529
2539
|
}
|
|
2530
2540
|
GenerateFlushRequest(cfds, flush_reason, &flush_req);
|
|
2531
|
-
|
|
2541
|
+
EnqueuePendingFlush(flush_req);
|
|
2532
2542
|
MaybeScheduleFlushOrCompaction();
|
|
2533
2543
|
}
|
|
2534
2544
|
|
|
@@ -2583,7 +2593,7 @@ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
|
|
|
2583
2593
|
if (immutable_db_options_.atomic_flush) {
|
|
2584
2594
|
FlushRequest flush_req;
|
|
2585
2595
|
GenerateFlushRequest(cfds, flush_reason, &flush_req);
|
|
2586
|
-
|
|
2596
|
+
EnqueuePendingFlush(flush_req);
|
|
2587
2597
|
for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
|
|
2588
2598
|
flush_memtable_ids.push_back(iter.second);
|
|
2589
2599
|
}
|
|
@@ -2597,7 +2607,7 @@ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
|
|
|
2597
2607
|
flush_reason,
|
|
2598
2608
|
{{cfd,
|
|
2599
2609
|
std::numeric_limits<uint64_t>::max() /* max_mem_id_to_persist */}}};
|
|
2600
|
-
if (
|
|
2610
|
+
if (EnqueuePendingFlush(flush_req)) {
|
|
2601
2611
|
cfd->SetFlushSkipReschedule();
|
|
2602
2612
|
};
|
|
2603
2613
|
}
|
|
@@ -2950,6 +2960,7 @@ void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) {
|
|
|
2950
2960
|
cfd->Ref();
|
|
2951
2961
|
compaction_queue_.push_back(cfd);
|
|
2952
2962
|
cfd->set_queued_for_compaction(true);
|
|
2963
|
+
++unscheduled_compactions_;
|
|
2953
2964
|
}
|
|
2954
2965
|
|
|
2955
2966
|
ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
|
|
@@ -3005,7 +3016,7 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
|
|
|
3005
3016
|
return cfd;
|
|
3006
3017
|
}
|
|
3007
3018
|
|
|
3008
|
-
bool DBImpl::
|
|
3019
|
+
bool DBImpl::EnqueuePendingFlush(const FlushRequest& flush_req) {
|
|
3009
3020
|
mutex_.AssertHeld();
|
|
3010
3021
|
bool enqueued = false;
|
|
3011
3022
|
if (reject_new_background_jobs_) {
|
|
@@ -3041,16 +3052,15 @@ bool DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
|
3041
3052
|
return enqueued;
|
|
3042
3053
|
}
|
|
3043
3054
|
|
|
3044
|
-
void DBImpl::
|
|
3055
|
+
void DBImpl::EnqueuePendingCompaction(ColumnFamilyData* cfd) {
|
|
3045
3056
|
mutex_.AssertHeld();
|
|
3046
3057
|
if (reject_new_background_jobs_) {
|
|
3047
3058
|
return;
|
|
3048
3059
|
}
|
|
3049
3060
|
if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
|
|
3050
|
-
TEST_SYNC_POINT_CALLBACK("
|
|
3061
|
+
TEST_SYNC_POINT_CALLBACK("EnqueuePendingCompaction::cfd",
|
|
3051
3062
|
static_cast<void*>(cfd));
|
|
3052
3063
|
AddToCompactionQueue(cfd);
|
|
3053
|
-
++unscheduled_compactions_;
|
|
3054
3064
|
}
|
|
3055
3065
|
}
|
|
3056
3066
|
|
|
@@ -3218,7 +3228,7 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
3218
3228
|
#ifndef NDEBUG
|
|
3219
3229
|
flush_req.reschedule_count += 1;
|
|
3220
3230
|
#endif /* !NDEBUG */
|
|
3221
|
-
|
|
3231
|
+
EnqueuePendingFlush(flush_req);
|
|
3222
3232
|
*reason = flush_reason;
|
|
3223
3233
|
*flush_rescheduled_to_retain_udt = true;
|
|
3224
3234
|
return Status::TryAgain();
|
|
@@ -3541,6 +3551,14 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3541
3551
|
is_manual && manual_compaction->disallow_trivial_move;
|
|
3542
3552
|
|
|
3543
3553
|
CompactionJobStats compaction_job_stats;
|
|
3554
|
+
// Set is_remote_compaction to true on CompactionBegin Event if
|
|
3555
|
+
// compaction_service is set except for trivial moves. We do not know whether
|
|
3556
|
+
// remote compaction will actually be successfully scheduled, or fall back to
|
|
3557
|
+
// local at this time. CompactionCompleted event will tell the truth where
|
|
3558
|
+
// the compaction actually happened.
|
|
3559
|
+
compaction_job_stats.is_remote_compaction =
|
|
3560
|
+
immutable_db_options().compaction_service != nullptr;
|
|
3561
|
+
|
|
3544
3562
|
Status status;
|
|
3545
3563
|
if (!error_handler_.IsBGWorkStopped()) {
|
|
3546
3564
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
@@ -3678,7 +3696,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3678
3696
|
->ComputeCompactionScore(*(c->immutable_options()),
|
|
3679
3697
|
*(c->mutable_cf_options()));
|
|
3680
3698
|
AddToCompactionQueue(cfd);
|
|
3681
|
-
++unscheduled_compactions_;
|
|
3682
3699
|
|
|
3683
3700
|
c.reset();
|
|
3684
3701
|
// Don't need to sleep here, because BackgroundCallCompaction
|
|
@@ -3707,7 +3724,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3707
3724
|
if (cfd->NeedsCompaction()) {
|
|
3708
3725
|
// Yes, we need more compactions!
|
|
3709
3726
|
AddToCompactionQueue(cfd);
|
|
3710
|
-
++unscheduled_compactions_;
|
|
3711
3727
|
MaybeScheduleFlushOrCompaction();
|
|
3712
3728
|
}
|
|
3713
3729
|
}
|
|
@@ -3768,6 +3784,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3768
3784
|
ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
|
|
3769
3785
|
|
|
3770
3786
|
compaction_job_stats.num_input_files = c->num_input_files(0);
|
|
3787
|
+
// Trivial moves do not get compacted remotely
|
|
3788
|
+
compaction_job_stats.is_remote_compaction = false;
|
|
3771
3789
|
|
|
3772
3790
|
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
|
|
3773
3791
|
compaction_job_stats, job_context->job_id);
|
|
@@ -3903,6 +3921,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3903
3921
|
&bg_bottom_compaction_scheduled_);
|
|
3904
3922
|
compaction_job.Prepare();
|
|
3905
3923
|
|
|
3924
|
+
std::unique_ptr<std::list<uint64_t>::iterator> min_options_file_number_elem;
|
|
3925
|
+
if (immutable_db_options().compaction_service != nullptr) {
|
|
3926
|
+
min_options_file_number_elem.reset(
|
|
3927
|
+
new std::list<uint64_t>::iterator(CaptureOptionsFileNumber()));
|
|
3928
|
+
}
|
|
3929
|
+
|
|
3906
3930
|
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
|
|
3907
3931
|
compaction_job_stats, job_context->job_id);
|
|
3908
3932
|
mutex_.Unlock();
|
|
@@ -3912,6 +3936,11 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3912
3936
|
compaction_job.Run().PermitUncheckedError();
|
|
3913
3937
|
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
|
|
3914
3938
|
mutex_.Lock();
|
|
3939
|
+
|
|
3940
|
+
if (immutable_db_options().compaction_service != nullptr) {
|
|
3941
|
+
ReleaseOptionsFileNumber(min_options_file_number_elem);
|
|
3942
|
+
}
|
|
3943
|
+
|
|
3915
3944
|
status =
|
|
3916
3945
|
compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
|
|
3917
3946
|
io_s = compaction_job.io_status();
|
|
@@ -3997,7 +4026,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3997
4026
|
*(c->mutable_cf_options()));
|
|
3998
4027
|
if (!cfd->queued_for_compaction()) {
|
|
3999
4028
|
AddToCompactionQueue(cfd);
|
|
4000
|
-
++unscheduled_compactions_;
|
|
4001
4029
|
}
|
|
4002
4030
|
}
|
|
4003
4031
|
}
|
|
@@ -4269,7 +4297,7 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
|
|
|
4269
4297
|
|
|
4270
4298
|
// Whenever we install new SuperVersion, we might need to issue new flushes or
|
|
4271
4299
|
// compactions.
|
|
4272
|
-
|
|
4300
|
+
EnqueuePendingCompaction(cfd);
|
|
4273
4301
|
MaybeScheduleFlushOrCompaction();
|
|
4274
4302
|
|
|
4275
4303
|
// Update max_total_in_memory_state_
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#ifndef NDEBUG
|
|
11
11
|
|
|
12
|
+
#include "db/blob/blob_file_cache.h"
|
|
12
13
|
#include "db/column_family.h"
|
|
13
14
|
#include "db/db_impl/db_impl.h"
|
|
14
15
|
#include "db/error_handler.h"
|
|
@@ -199,6 +200,11 @@ Status DBImpl::TEST_GetBGError() {
|
|
|
199
200
|
return error_handler_.GetBGError();
|
|
200
201
|
}
|
|
201
202
|
|
|
203
|
+
bool DBImpl::TEST_IsRecoveryInProgress() {
|
|
204
|
+
InstrumentedMutexLock l(&mutex_);
|
|
205
|
+
return error_handler_.IsRecoveryInProgress();
|
|
206
|
+
}
|
|
207
|
+
|
|
202
208
|
void DBImpl::TEST_LockMutex() { mutex_.Lock(); }
|
|
203
209
|
|
|
204
210
|
void DBImpl::TEST_UnlockMutex() { mutex_.Unlock(); }
|
|
@@ -323,5 +329,49 @@ size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
|
|
|
323
329
|
InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
|
|
324
330
|
return EstimateInMemoryStatsHistorySize();
|
|
325
331
|
}
|
|
332
|
+
|
|
333
|
+
void DBImpl::TEST_VerifyNoObsoleteFilesCached(
|
|
334
|
+
bool db_mutex_already_held) const {
|
|
335
|
+
// This check is somewhat expensive and obscure to make a part of every
|
|
336
|
+
// unit test in every build variety. Thus, we only enable it for ASAN builds.
|
|
337
|
+
if (!kMustFreeHeapAllocations) {
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
std::optional<InstrumentedMutexLock> l;
|
|
342
|
+
if (db_mutex_already_held) {
|
|
343
|
+
mutex_.AssertHeld();
|
|
344
|
+
} else {
|
|
345
|
+
l.emplace(&mutex_);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
std::vector<uint64_t> live_files;
|
|
349
|
+
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
350
|
+
if (cfd->IsDropped()) {
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
// Sneakily add both SST and blob files to the same list
|
|
354
|
+
cfd->current()->AddLiveFiles(&live_files, &live_files);
|
|
355
|
+
}
|
|
356
|
+
std::sort(live_files.begin(), live_files.end());
|
|
357
|
+
|
|
358
|
+
auto fn = [&live_files](const Slice& key, Cache::ObjectPtr, size_t,
|
|
359
|
+
const Cache::CacheItemHelper* helper) {
|
|
360
|
+
if (helper != BlobFileCache::GetHelper()) {
|
|
361
|
+
// Skip non-blob files for now
|
|
362
|
+
// FIXME: diagnose and fix the leaks of obsolete SST files revealed in
|
|
363
|
+
// unit tests.
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
// See TableCache and BlobFileCache
|
|
367
|
+
assert(key.size() == sizeof(uint64_t));
|
|
368
|
+
uint64_t file_number;
|
|
369
|
+
GetUnaligned(reinterpret_cast<const uint64_t*>(key.data()), &file_number);
|
|
370
|
+
// Assert file is in sorted live_files
|
|
371
|
+
assert(
|
|
372
|
+
std::binary_search(live_files.begin(), live_files.end(), file_number));
|
|
373
|
+
};
|
|
374
|
+
table_cache_->ApplyToAllEntries(fn, {});
|
|
375
|
+
}
|
|
326
376
|
} // namespace ROCKSDB_NAMESPACE
|
|
327
377
|
#endif // NDEBUG
|
|
@@ -47,7 +47,7 @@ Status DBImpl::SuggestCompactRange(ColumnFamilyHandle* column_family,
|
|
|
47
47
|
// compaction score
|
|
48
48
|
vstorage->ComputeCompactionScore(*cfd->ioptions(),
|
|
49
49
|
*cfd->GetLatestMutableCFOptions());
|
|
50
|
-
|
|
50
|
+
EnqueuePendingCompaction(cfd);
|
|
51
51
|
MaybeScheduleFlushOrCompaction();
|
|
52
52
|
}
|
|
53
53
|
return Status::OK();
|