@nxtedition/rocksdb 11.0.2 → 11.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +133 -122
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
- package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
- package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
- package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
- package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
- package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
- package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
- package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
- package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
- package/deps/rocksdb/rocksdb/file/filename.h +2 -1
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
- package/index.js +5 -17
- package/iterator.js +9 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -31,6 +31,7 @@ DeleteScheduler::DeleteScheduler(SystemClock* clock, FileSystem* fs,
|
|
|
31
31
|
total_trash_size_(0),
|
|
32
32
|
rate_bytes_per_sec_(rate_bytes_per_sec),
|
|
33
33
|
pending_files_(0),
|
|
34
|
+
next_trash_bucket_(0),
|
|
34
35
|
bytes_max_delete_chunk_(bytes_max_delete_chunk),
|
|
35
36
|
closing_(false),
|
|
36
37
|
cv_(&mu_),
|
|
@@ -66,10 +67,8 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
|
|
66
67
|
total_trash_size_.load() > total_size * max_trash_db_ratio_.load())) {
|
|
67
68
|
// Rate limiting is disabled or trash size makes up more than
|
|
68
69
|
// max_trash_db_ratio_ (default 25%) of the total DB size
|
|
69
|
-
|
|
70
|
-
Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
|
70
|
+
Status s = DeleteFileImmediately(file_path, /*accounted=*/true);
|
|
71
71
|
if (s.ok()) {
|
|
72
|
-
s = sst_file_manager_->OnDeleteFile(file_path);
|
|
73
72
|
ROCKS_LOG_INFO(info_log_,
|
|
74
73
|
"Deleted file %s immediately, rate_bytes_per_sec %" PRIi64
|
|
75
74
|
", total_trash_size %" PRIu64 ", total_size %" PRIi64
|
|
@@ -77,15 +76,57 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
|
|
77
76
|
file_path.c_str(), rate_bytes_per_sec_.load(),
|
|
78
77
|
total_trash_size_.load(), total_size,
|
|
79
78
|
max_trash_db_ratio_.load());
|
|
80
|
-
InstrumentedMutexLock l(&mu_);
|
|
81
|
-
RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
|
|
82
79
|
}
|
|
83
80
|
return s;
|
|
84
81
|
}
|
|
82
|
+
return AddFileToDeletionQueue(file_path, dir_to_sync, /*bucket=*/std::nullopt,
|
|
83
|
+
/*accounted=*/true);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
Status DeleteScheduler::DeleteUnaccountedFile(const std::string& file_path,
|
|
87
|
+
const std::string& dir_to_sync,
|
|
88
|
+
const bool force_bg,
|
|
89
|
+
std::optional<int32_t> bucket) {
|
|
90
|
+
uint64_t num_hard_links = 1;
|
|
91
|
+
fs_->NumFileLinks(file_path, IOOptions(), &num_hard_links, nullptr)
|
|
92
|
+
.PermitUncheckedError();
|
|
93
|
+
|
|
94
|
+
// We can tolerate rare races where we might immediately delete both links
|
|
95
|
+
// to a file.
|
|
96
|
+
if (rate_bytes_per_sec_.load() <= 0 || (!force_bg && num_hard_links > 1)) {
|
|
97
|
+
Status s = DeleteFileImmediately(file_path, /*accounted=*/false);
|
|
98
|
+
if (s.ok()) {
|
|
99
|
+
ROCKS_LOG_INFO(info_log_,
|
|
100
|
+
"Deleted file %s immediately, rate_bytes_per_sec %" PRIi64,
|
|
101
|
+
file_path.c_str(), rate_bytes_per_sec_.load());
|
|
102
|
+
}
|
|
103
|
+
return s;
|
|
104
|
+
}
|
|
105
|
+
return AddFileToDeletionQueue(file_path, dir_to_sync, bucket,
|
|
106
|
+
/*accounted=*/false);
|
|
107
|
+
}
|
|
85
108
|
|
|
109
|
+
Status DeleteScheduler::DeleteFileImmediately(const std::string& file_path,
|
|
110
|
+
bool accounted) {
|
|
111
|
+
TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
|
|
112
|
+
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteFile::cb",
|
|
113
|
+
const_cast<std::string*>(&file_path));
|
|
114
|
+
Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
|
115
|
+
if (s.ok()) {
|
|
116
|
+
s = OnDeleteFile(file_path, accounted);
|
|
117
|
+
InstrumentedMutexLock l(&mu_);
|
|
118
|
+
RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
|
|
119
|
+
}
|
|
120
|
+
return s;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
Status DeleteScheduler::AddFileToDeletionQueue(const std::string& file_path,
|
|
124
|
+
const std::string& dir_to_sync,
|
|
125
|
+
std::optional<int32_t> bucket,
|
|
126
|
+
bool accounted) {
|
|
86
127
|
// Move file to trash
|
|
87
128
|
std::string trash_file;
|
|
88
|
-
Status s = MarkAsTrash(file_path, &trash_file);
|
|
129
|
+
Status s = MarkAsTrash(file_path, accounted, &trash_file);
|
|
89
130
|
ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(),
|
|
90
131
|
s.ToString().c_str());
|
|
91
132
|
|
|
@@ -94,7 +135,7 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
|
|
94
135
|
file_path.c_str(), s.ToString().c_str());
|
|
95
136
|
s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
|
96
137
|
if (s.ok()) {
|
|
97
|
-
s =
|
|
138
|
+
s = OnDeleteFile(file_path, accounted);
|
|
98
139
|
ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately",
|
|
99
140
|
trash_file.c_str());
|
|
100
141
|
InstrumentedMutexLock l(&mu_);
|
|
@@ -104,11 +145,13 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
|
|
104
145
|
}
|
|
105
146
|
|
|
106
147
|
// Update the total trash size
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
148
|
+
if (accounted) {
|
|
149
|
+
uint64_t trash_file_size = 0;
|
|
150
|
+
IOStatus io_s =
|
|
151
|
+
fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
|
|
152
|
+
if (io_s.ok()) {
|
|
153
|
+
total_trash_size_.fetch_add(trash_file_size);
|
|
154
|
+
}
|
|
112
155
|
}
|
|
113
156
|
//**TODO: What should we do if we failed to
|
|
114
157
|
// get the file size?
|
|
@@ -117,8 +160,15 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
|
|
117
160
|
{
|
|
118
161
|
InstrumentedMutexLock l(&mu_);
|
|
119
162
|
RecordTick(stats_.get(), FILES_MARKED_TRASH);
|
|
120
|
-
queue_.emplace(trash_file, dir_to_sync);
|
|
163
|
+
queue_.emplace(trash_file, dir_to_sync, accounted, bucket);
|
|
121
164
|
pending_files_++;
|
|
165
|
+
if (bucket.has_value()) {
|
|
166
|
+
auto iter = pending_files_in_buckets_.find(bucket.value());
|
|
167
|
+
assert(iter != pending_files_in_buckets_.end());
|
|
168
|
+
if (iter != pending_files_in_buckets_.end()) {
|
|
169
|
+
iter->second++;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
122
172
|
if (pending_files_ == 1) {
|
|
123
173
|
cv_.SignalAll();
|
|
124
174
|
}
|
|
@@ -177,7 +227,7 @@ Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
|
|
|
177
227
|
}
|
|
178
228
|
|
|
179
229
|
Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
|
|
180
|
-
std::string* trash_file) {
|
|
230
|
+
bool accounted, std::string* trash_file) {
|
|
181
231
|
// Sanity check of the path
|
|
182
232
|
size_t idx = file_path.rfind('/');
|
|
183
233
|
if (idx == std::string::npos || idx == file_path.size() - 1) {
|
|
@@ -211,7 +261,7 @@ Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
|
|
|
211
261
|
}
|
|
212
262
|
cnt++;
|
|
213
263
|
}
|
|
214
|
-
if (s.ok()) {
|
|
264
|
+
if (s.ok() && accounted) {
|
|
215
265
|
s = sst_file_manager_->OnMoveFile(file_path, *trash_file);
|
|
216
266
|
}
|
|
217
267
|
return s;
|
|
@@ -235,6 +285,8 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|
|
235
285
|
uint64_t total_deleted_bytes = 0;
|
|
236
286
|
int64_t current_delete_rate = rate_bytes_per_sec_.load();
|
|
237
287
|
while (!queue_.empty() && !closing_) {
|
|
288
|
+
// Satisfy static analysis.
|
|
289
|
+
std::optional<int32_t> bucket = std::nullopt;
|
|
238
290
|
if (current_delete_rate != rate_bytes_per_sec_.load()) {
|
|
239
291
|
// User changed the delete rate
|
|
240
292
|
current_delete_rate = rate_bytes_per_sec_.load();
|
|
@@ -247,14 +299,17 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|
|
247
299
|
// Get new file to delete
|
|
248
300
|
const FileAndDir& fad = queue_.front();
|
|
249
301
|
std::string path_in_trash = fad.fname;
|
|
302
|
+
std::string dir_to_sync = fad.dir;
|
|
303
|
+
bool accounted = fad.accounted;
|
|
304
|
+
bucket = fad.bucket;
|
|
250
305
|
|
|
251
306
|
// We don't need to hold the lock while deleting the file
|
|
252
307
|
mu_.Unlock();
|
|
253
308
|
uint64_t deleted_bytes = 0;
|
|
254
309
|
bool is_complete = true;
|
|
255
310
|
// Delete file from trash and update total_penlty value
|
|
256
|
-
Status s =
|
|
257
|
-
|
|
311
|
+
Status s = DeleteTrashFile(path_in_trash, dir_to_sync, accounted,
|
|
312
|
+
&deleted_bytes, &is_complete);
|
|
258
313
|
total_deleted_bytes += deleted_bytes;
|
|
259
314
|
mu_.Lock();
|
|
260
315
|
if (is_complete) {
|
|
@@ -288,12 +343,20 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|
|
288
343
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
|
|
289
344
|
&total_penalty);
|
|
290
345
|
|
|
346
|
+
int32_t pending_files_in_bucket = std::numeric_limits<int32_t>::max();
|
|
291
347
|
if (is_complete) {
|
|
292
348
|
pending_files_--;
|
|
349
|
+
if (bucket.has_value()) {
|
|
350
|
+
auto iter = pending_files_in_buckets_.find(bucket.value());
|
|
351
|
+
assert(iter != pending_files_in_buckets_.end());
|
|
352
|
+
if (iter != pending_files_in_buckets_.end()) {
|
|
353
|
+
pending_files_in_bucket = iter->second--;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
293
356
|
}
|
|
294
|
-
if (pending_files_ == 0) {
|
|
295
|
-
// Unblock WaitForEmptyTrash since there are
|
|
296
|
-
// to be deleted
|
|
357
|
+
if (pending_files_ == 0 || pending_files_in_bucket == 0) {
|
|
358
|
+
// Unblock WaitForEmptyTrash or WaitForEmptyTrashBucket since there are
|
|
359
|
+
// no more files waiting to be deleted
|
|
297
360
|
cv_.SignalAll();
|
|
298
361
|
}
|
|
299
362
|
}
|
|
@@ -302,12 +365,14 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|
|
302
365
|
|
|
303
366
|
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
|
304
367
|
const std::string& dir_to_sync,
|
|
305
|
-
uint64_t* deleted_bytes,
|
|
368
|
+
bool accounted, uint64_t* deleted_bytes,
|
|
306
369
|
bool* is_complete) {
|
|
307
370
|
uint64_t file_size;
|
|
308
371
|
Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
|
|
309
372
|
*is_complete = true;
|
|
310
373
|
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
|
|
374
|
+
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteTrashFile::cb",
|
|
375
|
+
const_cast<std::string*>(&path_in_trash));
|
|
311
376
|
if (s.ok()) {
|
|
312
377
|
bool need_full_delete = true;
|
|
313
378
|
if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
|
|
@@ -374,7 +439,7 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
|
|
374
439
|
}
|
|
375
440
|
if (s.ok()) {
|
|
376
441
|
*deleted_bytes = file_size;
|
|
377
|
-
s =
|
|
442
|
+
s = OnDeleteFile(path_in_trash, accounted);
|
|
378
443
|
}
|
|
379
444
|
}
|
|
380
445
|
}
|
|
@@ -384,12 +449,24 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
|
|
384
449
|
path_in_trash.c_str(), s.ToString().c_str());
|
|
385
450
|
*deleted_bytes = 0;
|
|
386
451
|
} else {
|
|
387
|
-
|
|
452
|
+
if (accounted) {
|
|
453
|
+
total_trash_size_.fetch_sub(*deleted_bytes);
|
|
454
|
+
}
|
|
388
455
|
}
|
|
389
456
|
|
|
390
457
|
return s;
|
|
391
458
|
}
|
|
392
459
|
|
|
460
|
+
Status DeleteScheduler::OnDeleteFile(const std::string& file_path,
|
|
461
|
+
bool accounted) {
|
|
462
|
+
if (accounted) {
|
|
463
|
+
return sst_file_manager_->OnDeleteFile(file_path);
|
|
464
|
+
}
|
|
465
|
+
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::OnDeleteFile",
|
|
466
|
+
const_cast<std::string*>(&file_path));
|
|
467
|
+
return Status::OK();
|
|
468
|
+
}
|
|
469
|
+
|
|
393
470
|
void DeleteScheduler::WaitForEmptyTrash() {
|
|
394
471
|
InstrumentedMutexLock l(&mu_);
|
|
395
472
|
while (pending_files_ > 0 && !closing_) {
|
|
@@ -397,6 +474,30 @@ void DeleteScheduler::WaitForEmptyTrash() {
|
|
|
397
474
|
}
|
|
398
475
|
}
|
|
399
476
|
|
|
477
|
+
std::optional<int32_t> DeleteScheduler::NewTrashBucket() {
|
|
478
|
+
if (rate_bytes_per_sec_.load() <= 0) {
|
|
479
|
+
return std::nullopt;
|
|
480
|
+
}
|
|
481
|
+
InstrumentedMutexLock l(&mu_);
|
|
482
|
+
int32_t bucket_number = next_trash_bucket_++;
|
|
483
|
+
pending_files_in_buckets_.emplace(bucket_number, 0);
|
|
484
|
+
return bucket_number;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
void DeleteScheduler::WaitForEmptyTrashBucket(int32_t bucket) {
|
|
488
|
+
InstrumentedMutexLock l(&mu_);
|
|
489
|
+
if (bucket >= next_trash_bucket_) {
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
auto iter = pending_files_in_buckets_.find(bucket);
|
|
493
|
+
while (iter != pending_files_in_buckets_.end() && iter->second > 0 &&
|
|
494
|
+
!closing_) {
|
|
495
|
+
cv_.Wait();
|
|
496
|
+
iter = pending_files_in_buckets_.find(bucket);
|
|
497
|
+
}
|
|
498
|
+
pending_files_in_buckets_.erase(bucket);
|
|
499
|
+
}
|
|
500
|
+
|
|
400
501
|
void DeleteScheduler::MaybeCreateBackgroundThread() {
|
|
401
502
|
if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) {
|
|
402
503
|
bg_thread_.reset(
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
#include <map>
|
|
10
|
+
#include <optional>
|
|
10
11
|
#include <queue>
|
|
11
12
|
#include <string>
|
|
12
13
|
#include <thread>
|
|
@@ -48,16 +49,45 @@ class DeleteScheduler {
|
|
|
48
49
|
MaybeCreateBackgroundThread();
|
|
49
50
|
}
|
|
50
51
|
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
//
|
|
52
|
+
// Delete an accounted file that is tracked by `SstFileManager` and should be
|
|
53
|
+
// tracked by this `DeleteScheduler` when it's deleted.
|
|
54
|
+
// The file is deleted immediately if slow deletion is disabled. If force_bg
|
|
55
|
+
// is not set and trash to db size ratio exceeded the configured threshold,
|
|
56
|
+
// it is immediately deleted too. In all other cases, the file will be moved
|
|
57
|
+
// to a trash directory and scheduled for deletion by a background thread.
|
|
54
58
|
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
|
|
55
59
|
const bool force_bg = false);
|
|
56
60
|
|
|
57
|
-
//
|
|
61
|
+
// Delete an unaccounted file that is not tracked by `SstFileManager` and
|
|
62
|
+
// should not be tracked by this `DeleteScheduler` when it's deleted.
|
|
63
|
+
// The file is deleted immediately if slow deletion is disabled. If force_bg
|
|
64
|
+
// is not set and the file have more than 1 hard link, it is immediately
|
|
65
|
+
// deleted too. In all other cases, the file will be moved to a trash
|
|
66
|
+
// directory and scheduled for deletion by a background thread.
|
|
67
|
+
// This API also supports assign a file to a specified bucket created by
|
|
68
|
+
// `NewTrashBucket` when delete files in the background. So the caller can
|
|
69
|
+
// wait for a specific bucket to be empty by checking the
|
|
70
|
+
// `WaitForEmptyTrashBucket` API.
|
|
71
|
+
Status DeleteUnaccountedFile(const std::string& file_path,
|
|
72
|
+
const std::string& dir_to_sync,
|
|
73
|
+
const bool force_bg = false,
|
|
74
|
+
std::optional<int32_t> bucket = std::nullopt);
|
|
75
|
+
|
|
76
|
+
// Wait for all files being deleted in the background to finish or for
|
|
58
77
|
// destructor to be called.
|
|
59
78
|
void WaitForEmptyTrash();
|
|
60
79
|
|
|
80
|
+
// Creates a new trash bucket. A bucket is only created and returned when slow
|
|
81
|
+
// deletion is enabled.
|
|
82
|
+
// For each bucket that is created, the user should also call
|
|
83
|
+
// `WaitForEmptyTrashBucket` after scheduling file deletions to make sure the
|
|
84
|
+
// trash files are all cleared.
|
|
85
|
+
std::optional<int32_t> NewTrashBucket();
|
|
86
|
+
|
|
87
|
+
// Wait for all the files in the specified bucket to be deleted in the
|
|
88
|
+
// background or for the destructor to be called.
|
|
89
|
+
void WaitForEmptyTrashBucket(int32_t bucket);
|
|
90
|
+
|
|
61
91
|
// Return a map containing errors that happened in BackgroundEmptyTrash
|
|
62
92
|
// file_path => error status
|
|
63
93
|
std::map<std::string, Status> GetBackgroundErrors();
|
|
@@ -87,12 +117,21 @@ class DeleteScheduler {
|
|
|
87
117
|
}
|
|
88
118
|
|
|
89
119
|
private:
|
|
90
|
-
Status
|
|
120
|
+
Status DeleteFileImmediately(const std::string& file_path, bool accounted);
|
|
121
|
+
|
|
122
|
+
Status AddFileToDeletionQueue(const std::string& file_path,
|
|
123
|
+
const std::string& dir_to_sync,
|
|
124
|
+
std::optional<int32_t> bucket, bool accounted);
|
|
125
|
+
|
|
126
|
+
Status MarkAsTrash(const std::string& file_path, bool accounted,
|
|
127
|
+
std::string* path_in_trash);
|
|
91
128
|
|
|
92
129
|
Status DeleteTrashFile(const std::string& path_in_trash,
|
|
93
|
-
const std::string& dir_to_sync,
|
|
130
|
+
const std::string& dir_to_sync, bool accounted,
|
|
94
131
|
uint64_t* deleted_bytes, bool* is_complete);
|
|
95
132
|
|
|
133
|
+
Status OnDeleteFile(const std::string& file_path, bool accounted);
|
|
134
|
+
|
|
96
135
|
void BackgroundEmptyTrash();
|
|
97
136
|
|
|
98
137
|
void MaybeCreateBackgroundThread();
|
|
@@ -104,19 +143,28 @@ class DeleteScheduler {
|
|
|
104
143
|
std::atomic<uint64_t> total_trash_size_;
|
|
105
144
|
// Maximum number of bytes that should be deleted per second
|
|
106
145
|
std::atomic<int64_t> rate_bytes_per_sec_;
|
|
107
|
-
// Mutex to protect queue_, pending_files_,
|
|
146
|
+
// Mutex to protect queue_, pending_files_, next_trash_bucket_,
|
|
147
|
+
// pending_files_in_buckets_, bg_errors_, closing_, stats_
|
|
108
148
|
InstrumentedMutex mu_;
|
|
109
149
|
|
|
110
150
|
struct FileAndDir {
|
|
111
|
-
FileAndDir(const std::string&
|
|
151
|
+
FileAndDir(const std::string& _fname, const std::string& _dir,
|
|
152
|
+
bool _accounted, std::optional<int32_t> _bucket)
|
|
153
|
+
: fname(_fname), dir(_dir), accounted(_accounted), bucket(_bucket) {}
|
|
112
154
|
std::string fname;
|
|
113
155
|
std::string dir; // empty will be skipped.
|
|
156
|
+
bool accounted;
|
|
157
|
+
std::optional<int32_t> bucket;
|
|
114
158
|
};
|
|
115
159
|
|
|
116
160
|
// Queue of trash files that need to be deleted
|
|
117
161
|
std::queue<FileAndDir> queue_;
|
|
118
162
|
// Number of trash files that are waiting to be deleted
|
|
119
163
|
int32_t pending_files_;
|
|
164
|
+
// Next trash bucket that can be created
|
|
165
|
+
int32_t next_trash_bucket_;
|
|
166
|
+
// A mapping from trash bucket to number of pending files in the bucket
|
|
167
|
+
std::map<int32_t, int32_t> pending_files_in_buckets_;
|
|
120
168
|
uint64_t bytes_max_delete_chunk_;
|
|
121
169
|
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
|
122
170
|
std::map<std::string, Status> bg_errors_;
|
|
@@ -127,6 +175,7 @@ class DeleteScheduler {
|
|
|
127
175
|
// Condition variable signaled in these conditions
|
|
128
176
|
// - pending_files_ value change from 0 => 1
|
|
129
177
|
// - pending_files_ value change from 1 => 0
|
|
178
|
+
// - a value in pending_files_in_buckets change from 1 => 0
|
|
130
179
|
// - closing_ value is set to true
|
|
131
180
|
InstrumentedCondVar cv_;
|
|
132
181
|
// Background thread running BackgroundEmptyTrash
|
|
@@ -138,6 +187,10 @@ class DeleteScheduler {
|
|
|
138
187
|
// If the trash size constitutes for more than this fraction of the total DB
|
|
139
188
|
// size we will start deleting new files passed to DeleteScheduler
|
|
140
189
|
// immediately
|
|
190
|
+
// Unaccounted files passed for deletion will not cause change in
|
|
191
|
+
// total_trash_size_ or affect the DeleteScheduler::total_trash_size_ over
|
|
192
|
+
// SstFileManager::total_size_ ratio. Their slow deletion is not subject to
|
|
193
|
+
// this configured threshold either.
|
|
141
194
|
std::atomic<double> max_trash_db_ratio_;
|
|
142
195
|
static const uint64_t kMicrosInSecond = 1000 * 1000LL;
|
|
143
196
|
std::shared_ptr<Statistics> stats_;
|
|
@@ -78,7 +78,7 @@ class DeleteSchedulerTest : public testing::Test {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024,
|
|
81
|
-
size_t dummy_files_dirs_idx = 0) {
|
|
81
|
+
size_t dummy_files_dirs_idx = 0, bool track = true) {
|
|
82
82
|
std::string file_path =
|
|
83
83
|
dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name;
|
|
84
84
|
std::unique_ptr<WritableFile> f;
|
|
@@ -86,7 +86,9 @@ class DeleteSchedulerTest : public testing::Test {
|
|
|
86
86
|
std::string data(size, 'A');
|
|
87
87
|
EXPECT_OK(f->Append(data));
|
|
88
88
|
EXPECT_OK(f->Close());
|
|
89
|
-
|
|
89
|
+
if (track) {
|
|
90
|
+
EXPECT_OK(sst_file_mgr_->OnAddFile(file_path));
|
|
91
|
+
}
|
|
90
92
|
return file_path;
|
|
91
93
|
}
|
|
92
94
|
|
|
@@ -353,6 +355,8 @@ TEST_F(DeleteSchedulerTest, DisableRateLimiting) {
|
|
|
353
355
|
ASSERT_EQ(num_files,
|
|
354
356
|
stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
|
355
357
|
|
|
358
|
+
ASSERT_FALSE(delete_scheduler_->NewTrashBucket().has_value());
|
|
359
|
+
|
|
356
360
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
357
361
|
}
|
|
358
362
|
|
|
@@ -718,6 +722,141 @@ TEST_F(DeleteSchedulerTest, IsTrashCheck) {
|
|
|
718
722
|
ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.trashx"));
|
|
719
723
|
}
|
|
720
724
|
|
|
725
|
+
TEST_F(DeleteSchedulerTest, DeleteAccountedAndUnaccountedFiles) {
|
|
726
|
+
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / s
|
|
727
|
+
NewDeleteScheduler();
|
|
728
|
+
|
|
729
|
+
// Create 100 files, every file is 1 KB
|
|
730
|
+
int num_files = 100; // 100 files
|
|
731
|
+
uint64_t file_size = 1024; // 1 KB as a file size
|
|
732
|
+
std::vector<std::string> generated_files;
|
|
733
|
+
for (int i = 0; i < num_files; i++) {
|
|
734
|
+
std::string file_name = "file" + std::to_string(i) + ".data";
|
|
735
|
+
generated_files.push_back(NewDummyFile(file_name, file_size,
|
|
736
|
+
/*dummy_files_dirs_idx*/ 0,
|
|
737
|
+
/*track=*/false));
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
for (int i = 0; i < num_files; i++) {
|
|
741
|
+
if (i % 2) {
|
|
742
|
+
ASSERT_OK(sst_file_mgr_->OnAddFile(generated_files[i], file_size));
|
|
743
|
+
ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], ""));
|
|
744
|
+
} else {
|
|
745
|
+
ASSERT_OK(
|
|
746
|
+
delete_scheduler_->DeleteUnaccountedFile(generated_files[i], ""));
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
delete_scheduler_->WaitForEmptyTrash();
|
|
751
|
+
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
|
752
|
+
ASSERT_EQ(0, sst_file_mgr_->GetTotalSize());
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
TEST_F(DeleteSchedulerTest, ConcurrentlyDeleteUnaccountedFilesInBuckets) {
|
|
756
|
+
int bg_delete_file = 0;
|
|
757
|
+
int fg_delete_file = 0;
|
|
758
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
759
|
+
"DeleteScheduler::DeleteTrashFile:DeleteFile",
|
|
760
|
+
[&](void* /*arg*/) { bg_delete_file++; });
|
|
761
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
762
|
+
"DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; });
|
|
763
|
+
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / s
|
|
764
|
+
NewDeleteScheduler();
|
|
765
|
+
|
|
766
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
767
|
+
// Create 1000 files, every file is 1 KB
|
|
768
|
+
int num_files = 1000;
|
|
769
|
+
uint64_t file_size = 1024; // 1 KB as a file size
|
|
770
|
+
std::vector<std::string> generated_files;
|
|
771
|
+
for (int i = 0; i < num_files; i++) {
|
|
772
|
+
std::string file_name = "file" + std::to_string(i) + ".data";
|
|
773
|
+
generated_files.push_back(NewDummyFile(file_name, file_size,
|
|
774
|
+
/*dummy_files_dirs_idx*/ 0,
|
|
775
|
+
/*track=*/false));
|
|
776
|
+
}
|
|
777
|
+
// Concurrently delete files in different buckets and check all the buckets
|
|
778
|
+
// are empty.
|
|
779
|
+
int thread_cnt = 10;
|
|
780
|
+
int files_per_thread = 100;
|
|
781
|
+
std::atomic<int> thread_num(0);
|
|
782
|
+
std::vector<port::Thread> threads;
|
|
783
|
+
std::function<void()> delete_thread = [&]() {
|
|
784
|
+
std::optional<int32_t> bucket = delete_scheduler_->NewTrashBucket();
|
|
785
|
+
ASSERT_TRUE(bucket.has_value());
|
|
786
|
+
int idx = thread_num.fetch_add(1);
|
|
787
|
+
int range_start = idx * files_per_thread;
|
|
788
|
+
int range_end = range_start + files_per_thread;
|
|
789
|
+
for (int j = range_start; j < range_end; j++) {
|
|
790
|
+
ASSERT_OK(delete_scheduler_->DeleteUnaccountedFile(
|
|
791
|
+
generated_files[j], "", /*false_bg=*/false, bucket));
|
|
792
|
+
}
|
|
793
|
+
delete_scheduler_->WaitForEmptyTrashBucket(bucket.value());
|
|
794
|
+
};
|
|
795
|
+
|
|
796
|
+
for (int i = 0; i < thread_cnt; i++) {
|
|
797
|
+
threads.emplace_back(delete_thread);
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
for (size_t i = 0; i < threads.size(); i++) {
|
|
801
|
+
threads[i].join();
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
|
805
|
+
ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
|
806
|
+
ASSERT_EQ(1000, stats_->getAndResetTickerCount(FILES_MARKED_TRASH));
|
|
807
|
+
ASSERT_EQ(0, fg_delete_file);
|
|
808
|
+
ASSERT_EQ(1000, bg_delete_file);
|
|
809
|
+
|
|
810
|
+
// OK to re check an already empty bucket
|
|
811
|
+
delete_scheduler_->WaitForEmptyTrashBucket(9);
|
|
812
|
+
// Invalid bucket return too.
|
|
813
|
+
delete_scheduler_->WaitForEmptyTrashBucket(100);
|
|
814
|
+
std::optional<int32_t> next_bucket = delete_scheduler_->NewTrashBucket();
|
|
815
|
+
ASSERT_TRUE(next_bucket.has_value());
|
|
816
|
+
ASSERT_EQ(10, next_bucket.value());
|
|
817
|
+
delete_scheduler_->WaitForEmptyTrashBucket(10);
|
|
818
|
+
|
|
819
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
TEST_F(DeleteSchedulerTest,
|
|
823
|
+
ImmediatelyDeleteUnaccountedFilesWithRemainingLinks) {
|
|
824
|
+
int bg_delete_file = 0;
|
|
825
|
+
int fg_delete_file = 0;
|
|
826
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
827
|
+
"DeleteScheduler::DeleteTrashFile:DeleteFile",
|
|
828
|
+
[&](void* /*arg*/) { bg_delete_file++; });
|
|
829
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
830
|
+
"DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; });
|
|
831
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
832
|
+
|
|
833
|
+
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec
|
|
834
|
+
NewDeleteScheduler();
|
|
835
|
+
|
|
836
|
+
std::string file1 = NewDummyFile("data_1", 500 * 1024,
|
|
837
|
+
/*dummy_files_dirs_idx*/ 0, /*track=*/false);
|
|
838
|
+
std::string file2 = NewDummyFile("data_2", 100 * 1024,
|
|
839
|
+
/*dummy_files_dirs_idx*/ 0, /*track=*/false);
|
|
840
|
+
|
|
841
|
+
ASSERT_OK(env_->LinkFile(file1, dummy_files_dirs_[0] + "/data_1b"));
|
|
842
|
+
ASSERT_OK(env_->LinkFile(file2, dummy_files_dirs_[0] + "/data_2b"));
|
|
843
|
+
|
|
844
|
+
// Should delete in 4 batch if there is no hardlink
|
|
845
|
+
ASSERT_OK(
|
|
846
|
+
delete_scheduler_->DeleteUnaccountedFile(file1, "", /*force_bg=*/false));
|
|
847
|
+
ASSERT_OK(
|
|
848
|
+
delete_scheduler_->DeleteUnaccountedFile(file2, "", /*force_bg=*/false));
|
|
849
|
+
|
|
850
|
+
delete_scheduler_->WaitForEmptyTrash();
|
|
851
|
+
|
|
852
|
+
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
|
853
|
+
ASSERT_EQ(0, bg_delete_file);
|
|
854
|
+
ASSERT_EQ(2, fg_delete_file);
|
|
855
|
+
ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_MARKED_TRASH));
|
|
856
|
+
ASSERT_EQ(2, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
|
857
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
858
|
+
}
|
|
859
|
+
|
|
721
860
|
} // namespace ROCKSDB_NAMESPACE
|
|
722
861
|
|
|
723
862
|
int main(int argc, char** argv) {
|
|
@@ -125,8 +125,8 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination,
|
|
|
125
125
|
Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
|
126
126
|
const std::string& fname, const std::string& dir_to_sync,
|
|
127
127
|
const bool force_bg, const bool force_fg) {
|
|
128
|
-
SstFileManagerImpl* sfm =
|
|
129
|
-
|
|
128
|
+
SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
|
|
129
|
+
db_options->sst_file_manager.get());
|
|
130
130
|
if (sfm && !force_fg) {
|
|
131
131
|
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
|
|
132
132
|
} else {
|
|
@@ -134,6 +134,21 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
|
|
134
134
|
}
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
+
Status DeleteUnaccountedDBFile(const ImmutableDBOptions* db_options,
|
|
138
|
+
const std::string& fname,
|
|
139
|
+
const std::string& dir_to_sync,
|
|
140
|
+
const bool force_bg, const bool force_fg,
|
|
141
|
+
std::optional<int32_t> bucket) {
|
|
142
|
+
SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
|
|
143
|
+
db_options->sst_file_manager.get());
|
|
144
|
+
if (sfm && !force_fg) {
|
|
145
|
+
return sfm->ScheduleUnaccountedFileDeletion(fname, dir_to_sync, force_bg,
|
|
146
|
+
bucket);
|
|
147
|
+
} else {
|
|
148
|
+
return db_options->env->DeleteFile(fname);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
137
152
|
// requested_checksum_func_name brings the function name of the checksum
|
|
138
153
|
// generator in checksum_factory. Empty string is permitted, in which case the
|
|
139
154
|
// name of the generator created by the factory is unchecked. When
|
|
@@ -55,6 +55,16 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
|
|
55
55
|
const std::string& fname, const std::string& path_to_sync,
|
|
56
56
|
const bool force_bg, const bool force_fg);
|
|
57
57
|
|
|
58
|
+
// Delete an unaccounted DB file that is not tracked by SstFileManager and will
|
|
59
|
+
// not be tracked by its DeleteScheduler when getting deleted.
|
|
60
|
+
// If a legitimate bucket is provided and this file is scheduled for slow
|
|
61
|
+
// deletion, it will be assigned to the specified trash bucket.
|
|
62
|
+
Status DeleteUnaccountedDBFile(const ImmutableDBOptions* db_options,
|
|
63
|
+
const std::string& fname,
|
|
64
|
+
const std::string& dir_to_sync,
|
|
65
|
+
const bool force_bg, const bool force_fg,
|
|
66
|
+
std::optional<int32_t> bucket);
|
|
67
|
+
|
|
58
68
|
// TODO(hx235): pass the whole DBOptions intead of its individual fields
|
|
59
69
|
IOStatus GenerateOneFileChecksum(
|
|
60
70
|
FileSystem* fs, const std::string& file_path,
|
|
@@ -388,6 +388,7 @@ bool ParseFileName(const std::string& fname, uint64_t* number,
|
|
|
388
388
|
|
|
389
389
|
IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
|
|
390
390
|
const std::string& dbname, uint64_t descriptor_number,
|
|
391
|
+
Temperature temp,
|
|
391
392
|
FSDirectory* dir_contains_current_file) {
|
|
392
393
|
// Remove leading "dbname/" and add newline to manifest file name
|
|
393
394
|
std::string manifest = DescriptorFileName(dbname, descriptor_number);
|
|
@@ -397,8 +398,11 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
|
|
|
397
398
|
std::string tmp = TempFileName(dbname, descriptor_number);
|
|
398
399
|
IOOptions opts;
|
|
399
400
|
IOStatus s = PrepareIOFromWriteOptions(write_options, opts);
|
|
401
|
+
FileOptions file_opts;
|
|
402
|
+
file_opts.temperature = temp;
|
|
400
403
|
if (s.ok()) {
|
|
401
|
-
s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts
|
|
404
|
+
s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts,
|
|
405
|
+
file_opts);
|
|
402
406
|
}
|
|
403
407
|
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s);
|
|
404
408
|
if (s.ok()) {
|
|
@@ -423,7 +427,8 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
|
|
|
423
427
|
}
|
|
424
428
|
|
|
425
429
|
Status SetIdentityFile(const WriteOptions& write_options, Env* env,
|
|
426
|
-
const std::string& dbname,
|
|
430
|
+
const std::string& dbname, Temperature temp,
|
|
431
|
+
const std::string& db_id) {
|
|
427
432
|
std::string id;
|
|
428
433
|
if (db_id.empty()) {
|
|
429
434
|
id = env->GenerateUniqueId();
|
|
@@ -437,8 +442,11 @@ Status SetIdentityFile(const WriteOptions& write_options, Env* env,
|
|
|
437
442
|
Status s;
|
|
438
443
|
IOOptions opts;
|
|
439
444
|
s = PrepareIOFromWriteOptions(write_options, opts);
|
|
445
|
+
FileOptions file_opts;
|
|
446
|
+
file_opts.temperature = temp;
|
|
440
447
|
if (s.ok()) {
|
|
441
|
-
s = WriteStringToFile(env, id, tmp,
|
|
448
|
+
s = WriteStringToFile(env->GetFileSystem().get(), id, tmp,
|
|
449
|
+
/*should_sync=*/true, opts, file_opts);
|
|
442
450
|
}
|
|
443
451
|
if (s.ok()) {
|
|
444
452
|
s = env->RenameFile(tmp, identify_file_name);
|