@nxtedition/rocksdb 15.1.2 → 15.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/binding.cc +79 -38
- package/build.sh +1 -2
- package/deps/rocksdb/rocksdb/BUCK +10 -8
- package/deps/rocksdb/rocksdb/CMakeLists.txt +27 -2
- package/deps/rocksdb/rocksdb/Makefile +27 -116
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +101 -124
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +47 -30
- package/deps/rocksdb/rocksdb/db/c.cc +793 -131
- package/deps/rocksdb/rocksdb/db/c_test.c +571 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +226 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +95 -59
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +45 -35
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +47 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +69 -24
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +9 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +65 -0
- package/deps/rocksdb/rocksdb/db/db_etc3_test.cc +161 -0
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +20 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +13 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +114 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +39 -25
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +361 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +83 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +249 -4
- package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/listener_test.cc +7 -17
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +41 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +7 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +299 -90
- package/deps/rocksdb/rocksdb/db/version_set.h +56 -9
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +41 -39
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +7 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +48 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +16 -5
- package/deps/rocksdb/rocksdb/env/env_test.cc +126 -41
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +14 -7
- package/deps/rocksdb/rocksdb/env/io_posix.cc +304 -112
- package/deps/rocksdb/rocksdb/env/io_posix.h +16 -4
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/folly.mk +148 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +29 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +73 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +246 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +15 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +67 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +1 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +6 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +8 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +0 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +33 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +5 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +6 -6
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +10 -5
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +15 -3
- package/deps/rocksdb/rocksdb/options/cf_options.h +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +27 -36
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -2
- package/deps/rocksdb/rocksdb/options/options.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +19 -3
- package/deps/rocksdb/rocksdb/src.mk +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +155 -32
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +169 -125
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +22 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +43 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +17 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +13 -18
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +29 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +79 -19
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +48 -20
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +51 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +19 -0
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +3 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +48 -39
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +4 -0
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +32 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +3 -2
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +63 -12
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +16 -1
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +5 -1
- package/deps/rocksdb/rocksdb/util/bit_fields.h +133 -23
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +2 -5
- package/deps/rocksdb/rocksdb/util/compression.cc +51 -23
- package/deps/rocksdb/rocksdb/util/compression_test.cc +525 -270
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -4
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +11 -2
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -1
- package/deps/rocksdb/rocksdb/util/slice_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -2
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -2
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +19 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +75 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +303 -111
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +379 -0
- package/deps/rocksdb/rocksdb.gyp +1 -0
- package/iterator.js +66 -70
- package/package.json +6 -6
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +0 -183
|
@@ -11541,6 +11541,71 @@ TEST_F(DBCompactionTest, RecordNewestKeyTimeForTtlCompaction) {
|
|
|
11541
11541
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
|
11542
11542
|
}
|
|
11543
11543
|
|
|
11544
|
+
// Test verifies compaction file cutting logic when using tail size estimation
|
|
11545
|
+
// maintains output files at or below the target file size.
|
|
11546
|
+
TEST_F(DBCompactionTest, CompactionRespectsTargetSizeWithTailEstimation) {
|
|
11547
|
+
const int kInitialKeyCount = 10000; // 10k keys
|
|
11548
|
+
const int kValueSize = 100; // 100 bytes per key
|
|
11549
|
+
const int kSeed = 301;
|
|
11550
|
+
|
|
11551
|
+
Options options = CurrentOptions();
|
|
11552
|
+
options.target_file_size_is_upper_bound = true;
|
|
11553
|
+
options.target_file_size_base = 256 * 1024;
|
|
11554
|
+
options.write_buffer_size = 2 * 1024 * 1024;
|
|
11555
|
+
options.level0_file_num_compaction_trigger = 100; // Never trigger L0->L1
|
|
11556
|
+
options.compression = kNoCompression;
|
|
11557
|
+
|
|
11558
|
+
BlockBasedTableOptions table_options;
|
|
11559
|
+
table_options.partition_filters = true;
|
|
11560
|
+
table_options.metadata_block_size = 4 * 1024;
|
|
11561
|
+
table_options.index_type = BlockBasedTableOptions::kBinarySearch;
|
|
11562
|
+
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
|
|
11563
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
11564
|
+
|
|
11565
|
+
DestroyAndReopen(options);
|
|
11566
|
+
|
|
11567
|
+
// Generate 2 L0 files
|
|
11568
|
+
// Generate first file with 10k keys (each ~100 bytes) approx 1.2MB total
|
|
11569
|
+
Random rnd(kSeed);
|
|
11570
|
+
for (int i = 0; i < kInitialKeyCount; i++) {
|
|
11571
|
+
ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
|
|
11572
|
+
}
|
|
11573
|
+
ASSERT_OK(Flush());
|
|
11574
|
+
|
|
11575
|
+
// Generate second file with overlapping keys to force compaction (prevent
|
|
11576
|
+
// trivial move)
|
|
11577
|
+
for (int i = kInitialKeyCount / 2; i < kInitialKeyCount * 1.5; i++) {
|
|
11578
|
+
ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
|
|
11579
|
+
}
|
|
11580
|
+
ASSERT_OK(Flush());
|
|
11581
|
+
|
|
11582
|
+
// Capture file metadata and assert two L0 files
|
|
11583
|
+
std::vector<LiveFileMetaData> file_metadata;
|
|
11584
|
+
db_->GetLiveFilesMetaData(&file_metadata);
|
|
11585
|
+
ASSERT_EQ(file_metadata.size(), 2);
|
|
11586
|
+
for (const auto& file : file_metadata) {
|
|
11587
|
+
ASSERT_EQ(file.level, 0);
|
|
11588
|
+
};
|
|
11589
|
+
|
|
11590
|
+
// Manually compact LO files to L1
|
|
11591
|
+
CompactRangeOptions cro;
|
|
11592
|
+
cro.change_level = true;
|
|
11593
|
+
cro.target_level = 1;
|
|
11594
|
+
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
11595
|
+
|
|
11596
|
+
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
11597
|
+
|
|
11598
|
+
// Verify that compacted output files are under target file size
|
|
11599
|
+
for (const auto& file : file_metadata) {
|
|
11600
|
+
if (file.level > 0) {
|
|
11601
|
+
EXPECT_LE(file.size, options.target_file_size_base)
|
|
11602
|
+
<< "Output file size exceeds target size: " << " File: " << file.name
|
|
11603
|
+
<< " level: " << file.level << " File size: " << file.size
|
|
11604
|
+
<< " Target size: " << options.target_file_size_base;
|
|
11605
|
+
}
|
|
11606
|
+
}
|
|
11607
|
+
}
|
|
11608
|
+
|
|
11544
11609
|
class PeriodicCompactionListener : public EventListener {
|
|
11545
11610
|
public:
|
|
11546
11611
|
explicit PeriodicCompactionListener() {}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include "db/db_test_util.h"
|
|
7
|
+
|
|
8
|
+
namespace ROCKSDB_NAMESPACE {
|
|
9
|
+
|
|
10
|
+
class DBEtc3Test : public DBTestBase {
|
|
11
|
+
public:
|
|
12
|
+
DBEtc3Test() : DBTestBase("db_etc3_test", /*env_do_fsync=*/true) {}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
TEST_F(DBEtc3Test, ManifestRollOver) {
|
|
16
|
+
do {
|
|
17
|
+
Options options;
|
|
18
|
+
// Force new manifest on each manifest write
|
|
19
|
+
options.max_manifest_file_size = 0;
|
|
20
|
+
options.max_manifest_space_amp_pct = 0;
|
|
21
|
+
options = CurrentOptions(options);
|
|
22
|
+
CreateAndReopenWithCF({"pikachu"}, options);
|
|
23
|
+
{
|
|
24
|
+
ASSERT_OK(Put(1, "key1", std::string(1000, '1')));
|
|
25
|
+
ASSERT_OK(Put(1, "key2", std::string(1000, '2')));
|
|
26
|
+
ASSERT_OK(Put(1, "key3", std::string(1000, '3')));
|
|
27
|
+
uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo();
|
|
28
|
+
ASSERT_OK(Flush(1)); // This should trigger LogAndApply.
|
|
29
|
+
uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo();
|
|
30
|
+
ASSERT_GT(manifest_after_flush, manifest_before_flush);
|
|
31
|
+
// Re-open should always re-create manifest file
|
|
32
|
+
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
33
|
+
ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush);
|
|
34
|
+
ASSERT_EQ(std::string(1000, '1'), Get(1, "key1"));
|
|
35
|
+
ASSERT_EQ(std::string(1000, '2'), Get(1, "key2"));
|
|
36
|
+
ASSERT_EQ(std::string(1000, '3'), Get(1, "key3"));
|
|
37
|
+
}
|
|
38
|
+
} while (ChangeCompactOptions());
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
TEST_F(DBEtc3Test, AutoTuneManifestSize) {
|
|
42
|
+
// Ensure we have auto-tuning beyond max_manifest_file_size by default
|
|
43
|
+
ASSERT_EQ(DBOptions{}.max_manifest_space_amp_pct, 500);
|
|
44
|
+
|
|
45
|
+
Options options = CurrentOptions();
|
|
46
|
+
ASSERT_OK(db_->SetOptions({{"level0_file_num_compaction_trigger", "20"}}));
|
|
47
|
+
|
|
48
|
+
// Use large column family names to essentially control the amount of payload
|
|
49
|
+
// data needed for the manifest file. Drop manifest entries don't include the
|
|
50
|
+
// CF name so are small.
|
|
51
|
+
uint64_t prev_manifest_num = 0, cur_manifest_num = 0;
|
|
52
|
+
std::deque<ColumnFamilyHandle*> handles;
|
|
53
|
+
int counter = 5;
|
|
54
|
+
auto AddCfFn = [&]() {
|
|
55
|
+
std::string name = "cf" + std::to_string(counter++);
|
|
56
|
+
name.resize(1000, 'a');
|
|
57
|
+
ASSERT_OK(db_->CreateColumnFamily(options, name, &handles.emplace_back()));
|
|
58
|
+
prev_manifest_num = cur_manifest_num;
|
|
59
|
+
cur_manifest_num = dbfull()->TEST_Current_Manifest_FileNo();
|
|
60
|
+
};
|
|
61
|
+
auto DropCfFn = [&]() {
|
|
62
|
+
ASSERT_OK(db_->DropColumnFamily(handles.front()));
|
|
63
|
+
ASSERT_OK(db_->DestroyColumnFamilyHandle(handles.front()));
|
|
64
|
+
handles.pop_front();
|
|
65
|
+
prev_manifest_num = cur_manifest_num;
|
|
66
|
+
cur_manifest_num = dbfull()->TEST_Current_Manifest_FileNo();
|
|
67
|
+
};
|
|
68
|
+
auto TrivialManifestWriteFn = [&]() {
|
|
69
|
+
ASSERT_OK(Put("x", std::to_string(counter++)));
|
|
70
|
+
ASSERT_OK(Flush());
|
|
71
|
+
prev_manifest_num = cur_manifest_num;
|
|
72
|
+
cur_manifest_num = dbfull()->TEST_Current_Manifest_FileNo();
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
options.max_manifest_file_size = 1000000;
|
|
76
|
+
options.max_manifest_space_amp_pct = 0; // no auto-tuning yet
|
|
77
|
+
DestroyAndReopen(options);
|
|
78
|
+
|
|
79
|
+
// With the generous (minimum) maximum manifest size, should not be rotated
|
|
80
|
+
AddCfFn();
|
|
81
|
+
AddCfFn();
|
|
82
|
+
AddCfFn();
|
|
83
|
+
ASSERT_EQ(prev_manifest_num, cur_manifest_num);
|
|
84
|
+
|
|
85
|
+
// Change options for small max and (still) no auto-tuning
|
|
86
|
+
ASSERT_OK(db_->SetDBOptions({{"max_manifest_file_size", "3000"}}));
|
|
87
|
+
|
|
88
|
+
// Takes effect on the next manifest write
|
|
89
|
+
TrivialManifestWriteFn();
|
|
90
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
91
|
+
|
|
92
|
+
// Now we have to rewrite the whole manifest on each write because the
|
|
93
|
+
// compacted size exceeds the "max" size.
|
|
94
|
+
AddCfFn();
|
|
95
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
96
|
+
DropCfFn();
|
|
97
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
98
|
+
AddCfFn();
|
|
99
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
100
|
+
TrivialManifestWriteFn();
|
|
101
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
102
|
+
|
|
103
|
+
// Enabling auto-tuning should fix this, immediately for next manifest writes.
|
|
104
|
+
// This will allow up to double-ish the size of the compacted manifest,
|
|
105
|
+
// which last should have been 4000 + some bytes.
|
|
106
|
+
ASSERT_EQ(handles.size(), 4U);
|
|
107
|
+
ASSERT_OK(db_->SetDBOptions({{"max_manifest_space_amp_pct", "105"}}));
|
|
108
|
+
|
|
109
|
+
// After 9 CF names should be enough to rotate the manifest
|
|
110
|
+
for (int i = 1; i <= 5; ++i) {
|
|
111
|
+
if ((i % 2) == 1) {
|
|
112
|
+
DropCfFn();
|
|
113
|
+
}
|
|
114
|
+
AddCfFn();
|
|
115
|
+
ASSERT_EQ(prev_manifest_num, cur_manifest_num);
|
|
116
|
+
}
|
|
117
|
+
TrivialManifestWriteFn();
|
|
118
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
119
|
+
|
|
120
|
+
// We now have a different last compacted manifest size, should be
|
|
121
|
+
// able to go beyond 9 CFs named in manifest this time.
|
|
122
|
+
ASSERT_EQ(handles.size(), 6U);
|
|
123
|
+
|
|
124
|
+
DropCfFn();
|
|
125
|
+
DropCfFn();
|
|
126
|
+
for (int i = 1; i <= 4; ++i) {
|
|
127
|
+
DropCfFn();
|
|
128
|
+
AddCfFn();
|
|
129
|
+
ASSERT_EQ(prev_manifest_num, cur_manifest_num);
|
|
130
|
+
}
|
|
131
|
+
// We've written 10 named CFs to the manifest. We should be able to
|
|
132
|
+
// dynamically change the auto-tuning still based on the last "compacted"
|
|
133
|
+
// manifest size of 7000 + some bytes.
|
|
134
|
+
ASSERT_OK(db_->SetDBOptions({{"max_manifest_space_amp_pct", "51"}}));
|
|
135
|
+
TrivialManifestWriteFn();
|
|
136
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
137
|
+
// And the "compacted" manifest size has reset again, so should be changed
|
|
138
|
+
// again sooner.
|
|
139
|
+
ASSERT_EQ(handles.size(), 4U);
|
|
140
|
+
for (int i = 1; i <= 2; ++i) {
|
|
141
|
+
AddCfFn();
|
|
142
|
+
ASSERT_EQ(prev_manifest_num, cur_manifest_num);
|
|
143
|
+
}
|
|
144
|
+
// Enough for manifest change
|
|
145
|
+
AddCfFn();
|
|
146
|
+
ASSERT_LT(prev_manifest_num, cur_manifest_num);
|
|
147
|
+
|
|
148
|
+
// Wrap up
|
|
149
|
+
while (!handles.empty()) {
|
|
150
|
+
DropCfFn();
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
155
|
+
|
|
156
|
+
int main(int argc, char** argv) {
|
|
157
|
+
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
158
|
+
::testing::InitGoogleTest(&argc, argv);
|
|
159
|
+
RegisterCustomObjects(argc, argv);
|
|
160
|
+
return RUN_ALL_TESTS();
|
|
161
|
+
}
|
|
@@ -109,6 +109,7 @@ Status DBImpl::GetSortedWalFilesImpl(VectorWalPtr& files, bool need_seqnos) {
|
|
|
109
109
|
{
|
|
110
110
|
InstrumentedMutexLock l(&mutex_);
|
|
111
111
|
while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) {
|
|
112
|
+
TEST_SYNC_POINT("DBImpl::GetSortedWalFilesImpl:WaitPurge");
|
|
112
113
|
bg_cv_.Wait();
|
|
113
114
|
}
|
|
114
115
|
|
|
@@ -258,10 +258,10 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
|
|
|
258
258
|
[this]() { this->TriggerPeriodicCompaction(); });
|
|
259
259
|
|
|
260
260
|
versions_.reset(new VersionSet(
|
|
261
|
-
dbname_, &immutable_db_options_,
|
|
262
|
-
write_buffer_manager_, &write_controller_,
|
|
263
|
-
io_tracer_, db_id_, db_session_id_,
|
|
264
|
-
&error_handler_, read_only));
|
|
261
|
+
dbname_, &immutable_db_options_, mutable_db_options_, file_options_,
|
|
262
|
+
table_cache_.get(), write_buffer_manager_, &write_controller_,
|
|
263
|
+
&block_cache_tracer_, io_tracer_, db_id_, db_session_id_,
|
|
264
|
+
options.daily_offpeak_time_utc, &error_handler_, read_only));
|
|
265
265
|
column_family_memtables_.reset(
|
|
266
266
|
new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet()));
|
|
267
267
|
|
|
@@ -1412,7 +1412,7 @@ Status DBImpl::SetDBOptions(
|
|
|
1412
1412
|
file_options_for_compaction_ = FileOptions(new_db_options);
|
|
1413
1413
|
file_options_for_compaction_ = fs_->OptimizeForCompactionTableWrite(
|
|
1414
1414
|
file_options_for_compaction_, immutable_db_options_);
|
|
1415
|
-
versions_->
|
|
1415
|
+
versions_->UpdatedMutableDbOptions(mutable_db_options_, &mutex_);
|
|
1416
1416
|
// TODO(xiez): clarify why apply optimize for read to write options
|
|
1417
1417
|
file_options_for_compaction_ = fs_->OptimizeForCompactionTableRead(
|
|
1418
1418
|
file_options_for_compaction_, immutable_db_options_);
|
|
@@ -5047,6 +5047,19 @@ void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
|
|
|
5047
5047
|
}
|
|
5048
5048
|
}
|
|
5049
5049
|
|
|
5050
|
+
void DBImpl::GetColumnFamilyMetaData(
|
|
5051
|
+
ColumnFamilyHandle* column_family,
|
|
5052
|
+
const GetColumnFamilyMetaDataOptions& options,
|
|
5053
|
+
ColumnFamilyMetaData* metadata) {
|
|
5054
|
+
assert(column_family);
|
|
5055
|
+
auto* cfd =
|
|
5056
|
+
static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
|
|
5057
|
+
{
|
|
5058
|
+
InstrumentedMutexLock l(&mutex_);
|
|
5059
|
+
cfd->current()->GetColumnFamilyMetaData(options, metadata);
|
|
5060
|
+
}
|
|
5061
|
+
}
|
|
5062
|
+
|
|
5050
5063
|
void DBImpl::GetAllColumnFamilyMetaData(
|
|
5051
5064
|
std::vector<ColumnFamilyMetaData>* metadata) {
|
|
5052
5065
|
InstrumentedMutexLock l(&mutex_);
|
|
@@ -5506,7 +5519,7 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name,
|
|
|
5506
5519
|
return s;
|
|
5507
5520
|
}
|
|
5508
5521
|
|
|
5509
|
-
#
|
|
5522
|
+
#ifndef NROCKSDB_THREAD_STATUS
|
|
5510
5523
|
|
|
5511
5524
|
void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* cfd) const {
|
|
5512
5525
|
if (immutable_db_options_.enable_thread_tracking) {
|
|
@@ -5533,7 +5546,7 @@ void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {}
|
|
|
5533
5546
|
void DBImpl::EraseThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {}
|
|
5534
5547
|
|
|
5535
5548
|
void DBImpl::EraseThreadStatusDbInfo() const {}
|
|
5536
|
-
#endif //
|
|
5549
|
+
#endif // !NROCKSDB_THREAD_STATUS
|
|
5537
5550
|
|
|
5538
5551
|
//
|
|
5539
5552
|
// A global method that can dump out the build version
|
|
@@ -573,6 +573,11 @@ class DBImpl : public DB {
|
|
|
573
573
|
void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
|
|
574
574
|
ColumnFamilyMetaData* metadata) override;
|
|
575
575
|
|
|
576
|
+
// Get column family metadata with filtering based on key range and level
|
|
577
|
+
void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
|
|
578
|
+
const GetColumnFamilyMetaDataOptions& options,
|
|
579
|
+
ColumnFamilyMetaData* metadata) override;
|
|
580
|
+
|
|
576
581
|
void GetAllColumnFamilyMetaData(
|
|
577
582
|
std::vector<ColumnFamilyMetaData>* metadata) override;
|
|
578
583
|
|
|
@@ -2396,6 +2401,14 @@ class DBImpl : public DB {
|
|
|
2396
2401
|
JobContext* job_context, LogBuffer* log_buffer,
|
|
2397
2402
|
CompactionJobInfo* compaction_job_info);
|
|
2398
2403
|
|
|
2404
|
+
// Helper function to perform trivial move by updating manifest metadata
|
|
2405
|
+
// without rewriting data files. This is called when IsTrivialMove() is true.
|
|
2406
|
+
// REQUIRES: mutex held
|
|
2407
|
+
// Returns: Status of the trivial move operation
|
|
2408
|
+
Status PerformTrivialMove(Compaction& c, LogBuffer* log_buffer,
|
|
2409
|
+
bool& compaction_released, size_t& moved_files,
|
|
2410
|
+
size_t& moved_bytes);
|
|
2411
|
+
|
|
2399
2412
|
// REQUIRES: mutex unlocked
|
|
2400
2413
|
void TrackOrUntrackFiles(const std::vector<std::string>& existing_data_files,
|
|
2401
2414
|
bool track);
|
|
@@ -1424,6 +1424,56 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
|
|
|
1424
1424
|
return s;
|
|
1425
1425
|
}
|
|
1426
1426
|
|
|
1427
|
+
Status DBImpl::PerformTrivialMove(Compaction& c, LogBuffer* log_buffer,
|
|
1428
|
+
bool& compaction_released,
|
|
1429
|
+
size_t& moved_files, size_t& moved_bytes) {
|
|
1430
|
+
mutex_.AssertHeld();
|
|
1431
|
+
|
|
1432
|
+
ROCKS_LOG_BUFFER(log_buffer, "[%s] Moving %d files to level-%d\n",
|
|
1433
|
+
c.column_family_data()->GetName().c_str(),
|
|
1434
|
+
static_cast<int>(c.num_input_files(0)), c.output_level());
|
|
1435
|
+
|
|
1436
|
+
// Move files to the output level by editing the manifest
|
|
1437
|
+
for (unsigned int l = 0; l < c.num_input_levels(); l++) {
|
|
1438
|
+
if (c.level(l) == c.output_level()) {
|
|
1439
|
+
continue;
|
|
1440
|
+
}
|
|
1441
|
+
for (size_t i = 0; i < c.num_input_files(l); i++) {
|
|
1442
|
+
FileMetaData* f = c.input(l, i);
|
|
1443
|
+
c.edit()->DeleteFile(c.level(l), f->fd.GetNumber());
|
|
1444
|
+
c.edit()->AddFile(c.output_level(), f->fd.GetNumber(), f->fd.GetPathId(),
|
|
1445
|
+
f->fd.GetFileSize(), f->smallest, f->largest,
|
|
1446
|
+
f->fd.smallest_seqno, f->fd.largest_seqno,
|
|
1447
|
+
f->marked_for_compaction, f->temperature,
|
|
1448
|
+
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
1449
|
+
f->file_creation_time, f->epoch_number,
|
|
1450
|
+
f->file_checksum, f->file_checksum_func_name,
|
|
1451
|
+
f->unique_id, f->compensated_range_deletion_size,
|
|
1452
|
+
f->tail_size, f->user_defined_timestamps_persisted);
|
|
1453
|
+
moved_bytes += static_cast<size_t>(c.input(l, i)->fd.GetFileSize());
|
|
1454
|
+
ROCKS_LOG_BUFFER(
|
|
1455
|
+
log_buffer, "[%s] Moved #%" PRIu64 " to level-%d %" PRIu64 " bytes\n",
|
|
1456
|
+
c.column_family_data()->GetName().c_str(), f->fd.GetNumber(),
|
|
1457
|
+
c.output_level(), f->fd.GetFileSize());
|
|
1458
|
+
}
|
|
1459
|
+
moved_files += c.num_input_files(l);
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
// Install the new version
|
|
1463
|
+
const ReadOptions read_options(Env::IOActivity::kCompaction);
|
|
1464
|
+
const WriteOptions write_options(Env::IOActivity::kCompaction);
|
|
1465
|
+
Status status = versions_->LogAndApply(
|
|
1466
|
+
c.column_family_data(), read_options, write_options, c.edit(), &mutex_,
|
|
1467
|
+
directories_.GetDbDir(), /*new_descriptor_log=*/false,
|
|
1468
|
+
/*column_family_options=*/nullptr,
|
|
1469
|
+
[&c, &compaction_released](const Status& s) {
|
|
1470
|
+
c.ReleaseCompactionFiles(s);
|
|
1471
|
+
compaction_released = true;
|
|
1472
|
+
});
|
|
1473
|
+
|
|
1474
|
+
return status;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1427
1477
|
Status DBImpl::CompactFilesImpl(
|
|
1428
1478
|
const CompactionOptions& compact_options, ColumnFamilyData* cfd,
|
|
1429
1479
|
Version* version, const std::vector<std::string>& input_file_names,
|
|
@@ -1511,6 +1561,63 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1511
1561
|
// deletion compaction currently not allowed in CompactFiles.
|
|
1512
1562
|
assert(!c->deletion_compaction());
|
|
1513
1563
|
|
|
1564
|
+
// Check if this can be a trivial move (metadata-only update)
|
|
1565
|
+
// Similar to the logic in DBImpl::BackgroundCompaction
|
|
1566
|
+
// Note: We disable trivial move when compaction_service is present because
|
|
1567
|
+
// the service expects all compactions to go through CompactionJob for
|
|
1568
|
+
// tracking
|
|
1569
|
+
bool is_trivial_move = compact_options.allow_trivial_move &&
|
|
1570
|
+
c->IsTrivialMove() &&
|
|
1571
|
+
immutable_db_options().compaction_service == nullptr;
|
|
1572
|
+
|
|
1573
|
+
if (is_trivial_move) {
|
|
1574
|
+
// Perform trivial move: just update manifest without rewriting data
|
|
1575
|
+
TEST_SYNC_POINT("DBImpl::CompactFilesImpl:TrivialMove");
|
|
1576
|
+
|
|
1577
|
+
bool compaction_released = false;
|
|
1578
|
+
size_t moved_files = 0;
|
|
1579
|
+
size_t moved_bytes = 0;
|
|
1580
|
+
Status status = PerformTrivialMove(
|
|
1581
|
+
*c.get(), log_buffer, compaction_released, moved_files, moved_bytes);
|
|
1582
|
+
|
|
1583
|
+
if (status.ok()) {
|
|
1584
|
+
InstallSuperVersionAndScheduleWork(
|
|
1585
|
+
c->column_family_data(), job_context->superversion_contexts.data());
|
|
1586
|
+
|
|
1587
|
+
// Populate output file names for trivial move
|
|
1588
|
+
if (output_file_names != nullptr) {
|
|
1589
|
+
for (const auto& newf : c->edit()->GetNewFiles()) {
|
|
1590
|
+
output_file_names->push_back(TableFileName(
|
|
1591
|
+
c->immutable_options().cf_paths, newf.second.fd.GetNumber(),
|
|
1592
|
+
newf.second.fd.GetPathId()));
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
ROCKS_LOG_BUFFER(
|
|
1597
|
+
log_buffer,
|
|
1598
|
+
"[%s] Trivial move succeeded for %zu files, %zu bytes total\n",
|
|
1599
|
+
c->column_family_data()->GetName().c_str(), moved_files, moved_bytes);
|
|
1600
|
+
} else {
|
|
1601
|
+
if (!compaction_released) {
|
|
1602
|
+
c->ReleaseCompactionFiles(status);
|
|
1603
|
+
}
|
|
1604
|
+
ROCKS_LOG_BUFFER(log_buffer, "[%s] Trivial move failed: %s\n",
|
|
1605
|
+
c->column_family_data()->GetName().c_str(),
|
|
1606
|
+
status.ToString().c_str());
|
|
1607
|
+
error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
c.reset();
|
|
1611
|
+
bg_compaction_scheduled_--;
|
|
1612
|
+
if (bg_compaction_scheduled_ == 0) {
|
|
1613
|
+
bg_cv_.SignalAll();
|
|
1614
|
+
}
|
|
1615
|
+
MaybeScheduleFlushOrCompaction();
|
|
1616
|
+
|
|
1617
|
+
return status;
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
// Not a trivial move, proceed with full compaction
|
|
1514
1621
|
InitSnapshotContext(job_context);
|
|
1515
1622
|
|
|
1516
1623
|
std::unique_ptr<std::list<uint64_t>::iterator> pending_outputs_inserted_elem(
|
|
@@ -4074,35 +4181,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
4074
4181
|
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
|
|
4075
4182
|
compaction_job_stats, job_context->job_id);
|
|
4076
4183
|
|
|
4077
|
-
// Move files to next level
|
|
4078
|
-
int32_t moved_files = 0;
|
|
4079
|
-
int64_t moved_bytes = 0;
|
|
4080
|
-
for (unsigned int l = 0; l < c->num_input_levels(); l++) {
|
|
4081
|
-
if (c->level(l) == c->output_level()) {
|
|
4082
|
-
continue;
|
|
4083
|
-
}
|
|
4084
|
-
for (size_t i = 0; i < c->num_input_files(l); i++) {
|
|
4085
|
-
FileMetaData* f = c->input(l, i);
|
|
4086
|
-
c->edit()->DeleteFile(c->level(l), f->fd.GetNumber());
|
|
4087
|
-
c->edit()->AddFile(
|
|
4088
|
-
c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(),
|
|
4089
|
-
f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
|
|
4090
|
-
f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
|
|
4091
|
-
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
4092
|
-
f->file_creation_time, f->epoch_number, f->file_checksum,
|
|
4093
|
-
f->file_checksum_func_name, f->unique_id,
|
|
4094
|
-
f->compensated_range_deletion_size, f->tail_size,
|
|
4095
|
-
f->user_defined_timestamps_persisted);
|
|
4096
|
-
|
|
4097
|
-
ROCKS_LOG_BUFFER(
|
|
4098
|
-
log_buffer,
|
|
4099
|
-
"[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n",
|
|
4100
|
-
c->column_family_data()->GetName().c_str(), f->fd.GetNumber(),
|
|
4101
|
-
c->output_level(), f->fd.GetFileSize());
|
|
4102
|
-
++moved_files;
|
|
4103
|
-
moved_bytes += f->fd.GetFileSize();
|
|
4104
|
-
}
|
|
4105
|
-
}
|
|
4106
4184
|
if (c->compaction_reason() == CompactionReason::kLevelMaxLevelSize &&
|
|
4107
4185
|
c->immutable_options().compaction_pri == kRoundRobin) {
|
|
4108
4186
|
int start_level = c->start_level();
|
|
@@ -4113,14 +4191,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
4113
4191
|
vstorage->GetNextCompactCursor(start_level, c->num_input_files(0)));
|
|
4114
4192
|
}
|
|
4115
4193
|
}
|
|
4116
|
-
|
|
4117
|
-
|
|
4118
|
-
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
|
|
4122
|
-
compaction_released = true;
|
|
4123
|
-
});
|
|
4194
|
+
|
|
4195
|
+
// Perform the trivial move
|
|
4196
|
+
size_t moved_files = 0;
|
|
4197
|
+
size_t moved_bytes = 0;
|
|
4198
|
+
status = PerformTrivialMove(*c.get(), log_buffer, compaction_released,
|
|
4199
|
+
moved_files, moved_bytes);
|
|
4124
4200
|
io_s = versions_->io_status();
|
|
4125
4201
|
InstallSuperVersionAndScheduleWork(
|
|
4126
4202
|
c->column_family_data(), job_context->superversion_contexts.data());
|
|
@@ -4135,8 +4211,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
4135
4211
|
<< "total_files_size" << moved_bytes;
|
|
4136
4212
|
}
|
|
4137
4213
|
ROCKS_LOG_BUFFER(
|
|
4138
|
-
log_buffer,
|
|
4139
|
-
"[%s] Moved #%d files to level-%d %" PRIu64 " bytes %s: %s\n",
|
|
4214
|
+
log_buffer, "[%s] Moved #%d files to level-%zu %zu bytes %s: %s\n",
|
|
4140
4215
|
c->column_family_data()->GetName().c_str(), moved_files,
|
|
4141
4216
|
c->output_level(), moved_bytes, status.ToString().c_str(),
|
|
4142
4217
|
c->column_family_data()->current()->storage_info()->LevelSummary(&tmp));
|
|
@@ -267,6 +267,9 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
|
|
|
267
267
|
if (!job_context->HaveSomethingToDelete()) {
|
|
268
268
|
mutex_.AssertHeld();
|
|
269
269
|
--pending_purge_obsolete_files_;
|
|
270
|
+
if (pending_purge_obsolete_files_ == 0) {
|
|
271
|
+
bg_cv_.SignalAll();
|
|
272
|
+
}
|
|
270
273
|
}
|
|
271
274
|
});
|
|
272
275
|
|
|
@@ -293,9 +293,9 @@ Status DB::OpenAsFollower(
|
|
|
293
293
|
DBImplFollower* impl =
|
|
294
294
|
new DBImplFollower(tmp_opts, std::move(new_env), dbname, src_path);
|
|
295
295
|
impl->versions_.reset(new ReactiveVersionSet(
|
|
296
|
-
dbname, &impl->immutable_db_options_, impl->
|
|
297
|
-
impl->table_cache_.get(),
|
|
298
|
-
&impl->write_controller_, impl->io_tracer_));
|
|
296
|
+
dbname, &impl->immutable_db_options_, impl->mutable_db_options_,
|
|
297
|
+
impl->file_options_, impl->table_cache_.get(),
|
|
298
|
+
impl->write_buffer_manager_, &impl->write_controller_, impl->io_tracer_));
|
|
299
299
|
impl->column_family_memtables_.reset(
|
|
300
300
|
new ColumnFamilyMemTablesImpl(impl->versions_->GetColumnFamilySet()));
|
|
301
301
|
impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
|
|
@@ -329,7 +329,7 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
|
|
|
329
329
|
}
|
|
330
330
|
FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
|
|
331
331
|
file->SetPreallocationBlockSize(
|
|
332
|
-
|
|
332
|
+
mutable_db_options_.manifest_preallocation_size);
|
|
333
333
|
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
|
|
334
334
|
std::move(file), manifest, file_options, immutable_db_options_.clock,
|
|
335
335
|
io_tracer_, nullptr /* stats */,
|
|
@@ -783,9 +783,9 @@ Status DB::OpenAsSecondary(
|
|
|
783
783
|
handles->clear();
|
|
784
784
|
DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname, secondary_path);
|
|
785
785
|
impl->versions_.reset(new ReactiveVersionSet(
|
|
786
|
-
dbname, &impl->immutable_db_options_, impl->
|
|
787
|
-
impl->table_cache_.get(),
|
|
788
|
-
&impl->write_controller_, impl->io_tracer_));
|
|
786
|
+
dbname, &impl->immutable_db_options_, impl->mutable_db_options_,
|
|
787
|
+
impl->file_options_, impl->table_cache_.get(),
|
|
788
|
+
impl->write_buffer_manager_, &impl->write_controller_, impl->io_tracer_));
|
|
789
789
|
impl->column_family_memtables_.reset(
|
|
790
790
|
new ColumnFamilyMemTablesImpl(impl->versions_->GetColumnFamilySet()));
|
|
791
791
|
impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
|
|
@@ -1102,11 +1102,6 @@ Status DBImplSecondary::InitializeCompactionWorkspace(
|
|
|
1102
1102
|
return s;
|
|
1103
1103
|
}
|
|
1104
1104
|
|
|
1105
|
-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
|
1106
|
-
"Initialized compaction workspace with %zu subcompaction "
|
|
1107
|
-
"progress to resume",
|
|
1108
|
-
compaction_progress_.size());
|
|
1109
|
-
|
|
1110
1105
|
return Status::OK();
|
|
1111
1106
|
}
|
|
1112
1107
|
|
|
@@ -1219,6 +1214,11 @@ Status DBImplSecondary::PrepareCompactionProgressState() {
|
|
|
1219
1214
|
return HandleInvalidOrNoCompactionProgress(compaction_progress_file_path,
|
|
1220
1215
|
scan_result);
|
|
1221
1216
|
}
|
|
1217
|
+
|
|
1218
|
+
ROCKS_LOG_DEBUG(
|
|
1219
|
+
immutable_db_options_.info_log,
|
|
1220
|
+
"Loaded compaction progress with %zu subcompaction(s) from %s",
|
|
1221
|
+
compaction_progress_.size(), compaction_progress_file_path.c_str());
|
|
1222
1222
|
return s;
|
|
1223
1223
|
} else {
|
|
1224
1224
|
return HandleInvalidOrNoCompactionProgress(
|
|
@@ -1299,24 +1299,33 @@ Status DBImplSecondary::CompactWithoutInstallation(
|
|
|
1299
1299
|
}
|
|
1300
1300
|
Status s;
|
|
1301
1301
|
|
|
1302
|
+
const auto& mutable_cf_options = cfd->GetLatestMutableCFOptions();
|
|
1303
|
+
|
|
1302
1304
|
// TODO(hx235): Resuming compaction is currently incompatible with
|
|
1303
|
-
//
|
|
1304
|
-
//
|
|
1305
|
-
//
|
|
1306
|
-
//
|
|
1307
|
-
// the
|
|
1308
|
-
//
|
|
1309
|
-
//
|
|
1310
|
-
//
|
|
1305
|
+
// output hash verification (enabled via paranoid_file_checks=true or
|
|
1306
|
+
// verify_output_flags containing kVerifyIteration) because resumed compaction
|
|
1307
|
+
// will lose the hash computed before interruption.
|
|
1308
|
+
// Potential solutions:
|
|
1309
|
+
// 1. Persist the hash state: Before interruption, save the current hash value
|
|
1310
|
+
// of each output file to disk, allowing validation to continue correctly
|
|
1311
|
+
// after resumption.
|
|
1312
|
+
// 2. Immediate verification: Move output verification to happen
|
|
1313
|
+
// immediately after each output file is created and closed, eliminating
|
|
1314
|
+
// the need to maintain hash state across resumption boundaries.
|
|
1315
|
+
bool output_hash_verification_enabled =
|
|
1316
|
+
mutable_cf_options.paranoid_file_checks ||
|
|
1317
|
+
!!(mutable_cf_options.verify_output_flags &
|
|
1318
|
+
VerifyOutputFlags::kVerifyIteration);
|
|
1319
|
+
|
|
1311
1320
|
bool allow_resumption =
|
|
1312
|
-
options.allow_resumption &&
|
|
1313
|
-
!cfd->GetLatestMutableCFOptions().paranoid_file_checks;
|
|
1321
|
+
options.allow_resumption && !output_hash_verification_enabled;
|
|
1314
1322
|
|
|
1315
|
-
if (options.allow_resumption &&
|
|
1316
|
-
cfd->GetLatestMutableCFOptions().paranoid_file_checks) {
|
|
1323
|
+
if (options.allow_resumption && output_hash_verification_enabled) {
|
|
1317
1324
|
ROCKS_LOG_WARN(immutable_db_options_.info_log,
|
|
1318
1325
|
"Resume compaction configured but disabled due to "
|
|
1319
|
-
"
|
|
1326
|
+
"incompatibility with output hash verification "
|
|
1327
|
+
"(paranoid_file_checks=true or verify_output_flags "
|
|
1328
|
+
"containing kVerifyIteration)");
|
|
1320
1329
|
}
|
|
1321
1330
|
|
|
1322
1331
|
mutex_.Unlock();
|
|
@@ -1345,8 +1354,8 @@ Status DBImplSecondary::CompactWithoutInstallation(
|
|
|
1345
1354
|
CompactionOptions comp_options;
|
|
1346
1355
|
comp_options.compression = kDisableCompressionOption;
|
|
1347
1356
|
comp_options.output_file_size_limit = MaxFileSizeForLevel(
|
|
1348
|
-
|
|
1349
|
-
|
|
1357
|
+
mutable_cf_options, input.output_level, cfd->ioptions().compaction_style,
|
|
1358
|
+
vstorage->base_level(),
|
|
1350
1359
|
cfd->ioptions().level_compaction_dynamic_level_bytes);
|
|
1351
1360
|
|
|
1352
1361
|
std::vector<CompactionInputFiles> input_files;
|
|
@@ -1384,8 +1393,8 @@ Status DBImplSecondary::CompactWithoutInstallation(
|
|
|
1384
1393
|
}
|
|
1385
1394
|
c.reset(cfd->compaction_picker()->PickCompactionForCompactFiles(
|
|
1386
1395
|
comp_options, input_files, input.output_level, vstorage,
|
|
1387
|
-
|
|
1388
|
-
|
|
1396
|
+
mutable_cf_options, mutable_db_options_, 0, earliest_snapshot,
|
|
1397
|
+
job_context.snapshot_checker));
|
|
1389
1398
|
assert(c != nullptr);
|
|
1390
1399
|
c->FinalizeInputInfo(version);
|
|
1391
1400
|
|
|
@@ -1731,6 +1740,11 @@ Status DBImplSecondary::FinalizeCompactionProgressWriter(
|
|
|
1731
1740
|
return HandleCompactionProgressWriterCreationFailure(
|
|
1732
1741
|
"" /* temp_file_path */, final_file_path, compaction_progress_writer);
|
|
1733
1742
|
}
|
|
1743
|
+
|
|
1744
|
+
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
|
1745
|
+
"Finalized compaction progress writer onto %s",
|
|
1746
|
+
final_file_path.c_str());
|
|
1747
|
+
|
|
1734
1748
|
return Status::OK();
|
|
1735
1749
|
}
|
|
1736
1750
|
} // namespace ROCKSDB_NAMESPACE
|