@nxtedition/rocksdb 7.0.0-alpha.6 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +37 -36
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +8 -1
- package/deps/rocksdb/rocksdb/TARGETS +14 -0
- package/deps/rocksdb/rocksdb/cache/cache.cc +50 -2
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +111 -33
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +71 -31
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +31 -30
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +21 -8
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +35 -38
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +22 -9
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +48 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +78 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +7 -5
- package/deps/rocksdb/rocksdb/db/c.cc +777 -108
- package/deps/rocksdb/rocksdb/db/c_test.c +290 -30
- package/deps/rocksdb/rocksdb/db/column_family.cc +13 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +24 -36
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +18 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +24 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +38 -40
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +253 -24
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +9 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +67 -10
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +83 -7
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +68 -0
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +40 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +94 -23
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +17 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +263 -58
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +186 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +24 -28
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +116 -83
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +13 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +71 -34
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +72 -33
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +629 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +438 -10
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +43 -2
- package/deps/rocksdb/rocksdb/db/db_test.cc +41 -1
- package/deps/rocksdb/rocksdb/db/db_test2.cc +41 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +90 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +109 -16
- package/deps/rocksdb/rocksdb/db/dbformat.h +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -3
- package/deps/rocksdb/rocksdb/db/log_reader.cc +22 -4
- package/deps/rocksdb/rocksdb/db/log_reader.h +4 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +4 -0
- package/deps/rocksdb/rocksdb/db/post_memtable_callback.h +25 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
- package/deps/rocksdb/rocksdb/db/repair_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +65 -2
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -2
- package/deps/rocksdb/rocksdb/db/version_set.cc +52 -0
- package/deps/rocksdb/rocksdb/db/version_set.h +57 -43
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +14 -4
- package/deps/rocksdb/rocksdb/db/wal_manager.h +16 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +141 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +55 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +292 -0
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +42 -19
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +11 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +74 -167
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -9
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -9
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +117 -10
- package/deps/rocksdb/rocksdb/env/composite_env.cc +7 -0
- package/deps/rocksdb/rocksdb/env/env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +3 -3
- package/deps/rocksdb/rocksdb/env/env_test.cc +5 -5
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +45 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.h +14 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/env/io_posix.cc +50 -24
- package/deps/rocksdb/rocksdb/env/io_posix.h +9 -7
- package/deps/rocksdb/rocksdb/env/mock_env.cc +9 -3
- package/deps/rocksdb/rocksdb/file/file_util.cc +4 -1
- package/deps/rocksdb/rocksdb/file/filename.cc +14 -0
- package/deps/rocksdb/rocksdb/file/line_file_reader.cc +9 -4
- package/deps/rocksdb/rocksdb/file/line_file_reader.h +3 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +157 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -1
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +68 -32
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +20 -6
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +10 -6
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +231 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +13 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +4 -20
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +31 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +4 -20
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +34 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +36 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +74 -0
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +36 -3
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +16 -3
- package/deps/rocksdb/rocksdb/logging/env_logger.h +3 -3
- package/deps/rocksdb/rocksdb/logging/log_buffer.cc +2 -2
- package/deps/rocksdb/rocksdb/logging/log_buffer.h +1 -1
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -3
- package/deps/rocksdb/rocksdb/memory/arena.cc +0 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +61 -73
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +6 -5
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +6 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +7 -3
- package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +3 -0
- package/deps/rocksdb/rocksdb/options/options.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +1 -1
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +4 -0
- package/deps/rocksdb/rocksdb/port/port_posix.h +0 -2
- package/deps/rocksdb/rocksdb/port/sys_time.h +27 -11
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +16 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.h +11 -2
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/port_win.h +2 -16
- package/deps/rocksdb/rocksdb/port/win/win_jemalloc.cc +2 -2
- package/deps/rocksdb/rocksdb/port/win/win_logger.cc +2 -2
- package/deps/rocksdb/rocksdb/rocksdb.pc.in +4 -5
- package/deps/rocksdb/rocksdb/src.mk +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +7 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +39 -43
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +2 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +42 -34
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.cc +7 -13
- package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +8 -5
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -14
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +2 -0
- package/deps/rocksdb/rocksdb/table/format.h +1 -3
- package/deps/rocksdb/rocksdb/table/get_context.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -0
- package/deps/rocksdb/rocksdb/table/scoped_arena_iterator.h +3 -4
- package/deps/rocksdb/rocksdb/table/table_test.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +102 -6
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +19 -2
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +2 -1
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +2 -4
- package/deps/rocksdb/rocksdb/util/autovector.h +11 -1
- package/deps/rocksdb/rocksdb/util/cleanable.cc +1 -0
- package/deps/rocksdb/rocksdb/util/compression.h +5 -7
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +14 -8
- package/deps/rocksdb/rocksdb/util/string_util.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +33 -63
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +3 -2
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +14 -0
- package/deps/rocksdb/rocksdb/utilities/counted_fs.h +7 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +8 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc +6 -4
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +34 -21
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +31 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +63 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +40 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +426 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +37 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +16 -18
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +61 -0
- package/deps/rocksdb/rocksdb.gyp +1 -0
- package/index.js +5 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
#include "db/log_writer.h"
|
|
33
33
|
#include "db/logs_with_prep_tracker.h"
|
|
34
34
|
#include "db/memtable_list.h"
|
|
35
|
+
#include "db/post_memtable_callback.h"
|
|
35
36
|
#include "db/pre_release_callback.h"
|
|
36
37
|
#include "db/range_del_aggregator.h"
|
|
37
38
|
#include "db/read_callback.h"
|
|
@@ -113,6 +114,68 @@ class Directories {
|
|
|
113
114
|
|
|
114
115
|
FSDirectory* GetDbDir() { return db_dir_.get(); }
|
|
115
116
|
|
|
117
|
+
IOStatus Close(const IOOptions& options, IODebugContext* dbg) {
|
|
118
|
+
// close all directories for all database paths
|
|
119
|
+
IOStatus s = IOStatus::OK();
|
|
120
|
+
IOStatus temp_s = IOStatus::OK();
|
|
121
|
+
|
|
122
|
+
// The default implementation for Close() in Directory/FSDirectory class
|
|
123
|
+
// "NotSupported" status, the upper level interface should be able to
|
|
124
|
+
// handle this error so that Close() does not fail after upgrading when
|
|
125
|
+
// run on FileSystems that have not implemented `Directory::Close()` or
|
|
126
|
+
// `FSDirectory::Close()` yet
|
|
127
|
+
|
|
128
|
+
if (db_dir_) {
|
|
129
|
+
temp_s = db_dir_->Close(options, dbg);
|
|
130
|
+
if (!temp_s.ok()) {
|
|
131
|
+
if (temp_s.IsNotSupported()) {
|
|
132
|
+
temp_s.PermitUncheckedError();
|
|
133
|
+
} else {
|
|
134
|
+
s = temp_s;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!s.ok()) {
|
|
140
|
+
return s;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (wal_dir_) {
|
|
144
|
+
s = wal_dir_->Close(options, dbg);
|
|
145
|
+
if (!temp_s.ok()) {
|
|
146
|
+
if (temp_s.IsNotSupported()) {
|
|
147
|
+
temp_s.PermitUncheckedError();
|
|
148
|
+
} else {
|
|
149
|
+
s = temp_s;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (!s.ok()) {
|
|
155
|
+
return s;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (data_dirs_.size() > 0 && s.ok()) {
|
|
159
|
+
for (auto& data_dir_ptr : data_dirs_) {
|
|
160
|
+
if (data_dir_ptr) {
|
|
161
|
+
temp_s = data_dir_ptr->Close(options, dbg);
|
|
162
|
+
if (!temp_s.ok()) {
|
|
163
|
+
if (temp_s.IsNotSupported()) {
|
|
164
|
+
temp_s.PermitUncheckedError();
|
|
165
|
+
} else {
|
|
166
|
+
return temp_s;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Mark temp_s as checked when temp_s is still the initial status
|
|
174
|
+
// (IOStatus::OK(), not checked yet)
|
|
175
|
+
temp_s.PermitUncheckedError();
|
|
176
|
+
return s;
|
|
177
|
+
}
|
|
178
|
+
|
|
116
179
|
private:
|
|
117
180
|
std::unique_ptr<FSDirectory> db_dir_;
|
|
118
181
|
std::vector<std::unique_ptr<FSDirectory>> data_dirs_;
|
|
@@ -283,6 +346,19 @@ class DBImpl : public DB {
|
|
|
283
346
|
|
|
284
347
|
virtual const Snapshot* GetSnapshot() override;
|
|
285
348
|
virtual void ReleaseSnapshot(const Snapshot* snapshot) override;
|
|
349
|
+
// Create a timestamped snapshot. This snapshot can be shared by multiple
|
|
350
|
+
// readers. If any of them uses it for write conflict checking, then
|
|
351
|
+
// is_write_conflict_boundary is true. For simplicity, set it to true by
|
|
352
|
+
// default.
|
|
353
|
+
std::pair<Status, std::shared_ptr<const Snapshot>> CreateTimestampedSnapshot(
|
|
354
|
+
SequenceNumber snapshot_seq, uint64_t ts);
|
|
355
|
+
std::shared_ptr<const SnapshotImpl> GetTimestampedSnapshot(uint64_t ts) const;
|
|
356
|
+
void ReleaseTimestampedSnapshotsOlderThan(
|
|
357
|
+
uint64_t ts, size_t* remaining_total_ss = nullptr);
|
|
358
|
+
Status GetTimestampedSnapshots(uint64_t ts_lb, uint64_t ts_ub,
|
|
359
|
+
std::vector<std::shared_ptr<const Snapshot>>&
|
|
360
|
+
timestamped_snapshots) const;
|
|
361
|
+
|
|
286
362
|
using DB::GetProperty;
|
|
287
363
|
virtual bool GetProperty(ColumnFamilyHandle* column_family,
|
|
288
364
|
const Slice& property, std::string* value) override;
|
|
@@ -1160,6 +1236,8 @@ class DBImpl : public DB {
|
|
|
1160
1236
|
static void TEST_ResetDbSessionIdGen();
|
|
1161
1237
|
static std::string GenerateDbSessionId(Env* env);
|
|
1162
1238
|
|
|
1239
|
+
bool seq_per_batch() const { return seq_per_batch_; }
|
|
1240
|
+
|
|
1163
1241
|
protected:
|
|
1164
1242
|
const std::string dbname_;
|
|
1165
1243
|
// TODO(peterd): unify with VersionSet::db_id_
|
|
@@ -1183,6 +1261,9 @@ class DBImpl : public DB {
|
|
|
1183
1261
|
InstrumentedMutex trace_mutex_;
|
|
1184
1262
|
BlockCacheTracer block_cache_tracer_;
|
|
1185
1263
|
|
|
1264
|
+
// constant false canceled flag, used when the compaction is not manual
|
|
1265
|
+
const std::atomic<bool> kManualCompactionCanceledFalse_{false};
|
|
1266
|
+
|
|
1186
1267
|
// State below is protected by mutex_
|
|
1187
1268
|
// With two_write_queues enabled, some of the variables that accessed during
|
|
1188
1269
|
// WriteToWAL need different synchronization: log_empty_, alive_log_files_,
|
|
@@ -1207,9 +1288,6 @@ class DBImpl : public DB {
|
|
|
1207
1288
|
// only used for dynamically adjusting max_total_wal_size. it is a sum of
|
|
1208
1289
|
// [write_buffer_size * max_write_buffer_number] over all column families
|
|
1209
1290
|
uint64_t max_total_in_memory_state_;
|
|
1210
|
-
// If true, we have only one (default) column family. We use this to optimize
|
|
1211
|
-
// some code-paths
|
|
1212
|
-
bool single_column_family_mode_;
|
|
1213
1291
|
|
|
1214
1292
|
// The options to access storage files
|
|
1215
1293
|
const FileOptions file_options_;
|
|
@@ -1240,6 +1318,39 @@ class DBImpl : public DB {
|
|
|
1240
1318
|
|
|
1241
1319
|
std::atomic<bool> shutting_down_;
|
|
1242
1320
|
|
|
1321
|
+
// RecoveryContext struct stores the context about version edits along
|
|
1322
|
+
// with corresponding column_family_data and column_family_options.
|
|
1323
|
+
class RecoveryContext {
|
|
1324
|
+
public:
|
|
1325
|
+
~RecoveryContext() {
|
|
1326
|
+
for (auto& edit_list : edit_lists_) {
|
|
1327
|
+
for (auto* edit : edit_list) {
|
|
1328
|
+
delete edit;
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
void UpdateVersionEdits(ColumnFamilyData* cfd, const VersionEdit& edit) {
|
|
1334
|
+
assert(cfd != nullptr);
|
|
1335
|
+
if (map_.find(cfd->GetID()) == map_.end()) {
|
|
1336
|
+
uint32_t size = static_cast<uint32_t>(map_.size());
|
|
1337
|
+
map_.emplace(cfd->GetID(), size);
|
|
1338
|
+
cfds_.emplace_back(cfd);
|
|
1339
|
+
mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
|
|
1340
|
+
edit_lists_.emplace_back(autovector<VersionEdit*>());
|
|
1341
|
+
}
|
|
1342
|
+
uint32_t i = map_[cfd->GetID()];
|
|
1343
|
+
edit_lists_[i].emplace_back(new VersionEdit(edit));
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
std::unordered_map<uint32_t, uint32_t> map_; // cf_id to index;
|
|
1347
|
+
autovector<ColumnFamilyData*> cfds_;
|
|
1348
|
+
autovector<const MutableCFOptions*> mutable_cf_opts_;
|
|
1349
|
+
autovector<autovector<VersionEdit*>> edit_lists_;
|
|
1350
|
+
// files_to_delete_ contains sst files
|
|
1351
|
+
std::unordered_set<std::string> files_to_delete_;
|
|
1352
|
+
};
|
|
1353
|
+
|
|
1243
1354
|
// Except in DB::Open(), WriteOptionsFile can only be called when:
|
|
1244
1355
|
// Persist options to options file.
|
|
1245
1356
|
// If need_mutex_lock = false, the method will lock DB mutex.
|
|
@@ -1309,7 +1420,8 @@ class DBImpl : public DB {
|
|
|
1309
1420
|
uint64_t* log_used = nullptr, uint64_t log_ref = 0,
|
|
1310
1421
|
bool disable_memtable = false, uint64_t* seq_used = nullptr,
|
|
1311
1422
|
size_t batch_cnt = 0,
|
|
1312
|
-
PreReleaseCallback* pre_release_callback = nullptr
|
|
1423
|
+
PreReleaseCallback* pre_release_callback = nullptr,
|
|
1424
|
+
PostMemTableCallback* post_memtable_callback = nullptr);
|
|
1313
1425
|
|
|
1314
1426
|
Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1315
1427
|
WriteCallback* callback = nullptr,
|
|
@@ -1356,16 +1468,19 @@ class DBImpl : public DB {
|
|
|
1356
1468
|
// be made to the descriptor are added to *edit.
|
|
1357
1469
|
// recovered_seq is set to less than kMaxSequenceNumber if the log's tail is
|
|
1358
1470
|
// skipped.
|
|
1471
|
+
// recovery_ctx stores the context about version edits and all those
|
|
1472
|
+
// edits are persisted to new Manifest after successfully syncing the new WAL.
|
|
1359
1473
|
virtual Status Recover(
|
|
1360
1474
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
1361
1475
|
bool read_only = false, bool error_if_wal_file_exists = false,
|
|
1362
1476
|
bool error_if_data_exists_in_wals = false,
|
|
1363
|
-
uint64_t* recovered_seq = nullptr
|
|
1477
|
+
uint64_t* recovered_seq = nullptr,
|
|
1478
|
+
RecoveryContext* recovery_ctx = nullptr);
|
|
1364
1479
|
|
|
1365
1480
|
virtual bool OwnTablesAndLogs() const { return true; }
|
|
1366
1481
|
|
|
1367
1482
|
// Set DB identity file, and write DB ID to manifest if necessary.
|
|
1368
|
-
Status SetDBId(bool read_only);
|
|
1483
|
+
Status SetDBId(bool read_only, RecoveryContext* recovery_ctx);
|
|
1369
1484
|
|
|
1370
1485
|
// REQUIRES: db mutex held when calling this function, but the db mutex can
|
|
1371
1486
|
// be released and re-acquired. Db mutex will be held when the function
|
|
@@ -1374,20 +1489,31 @@ class DBImpl : public DB {
|
|
|
1374
1489
|
// not referenced in the MANIFEST (e.g.
|
|
1375
1490
|
// 1. It's best effort recovery;
|
|
1376
1491
|
// 2. The VersionEdits referencing the SST files are appended to
|
|
1377
|
-
//
|
|
1492
|
+
// RecoveryContext, DB crashes when syncing the MANIFEST, the VersionEdits are
|
|
1378
1493
|
// still not synced to MANIFEST during recovery.)
|
|
1379
|
-
//
|
|
1494
|
+
// It stores the SST files to be deleted in RecoveryContext. In the
|
|
1380
1495
|
// meantime, we find out the largest file number present in the paths, and
|
|
1381
1496
|
// bump up the version set's next_file_number_ to be 1 + largest_file_number.
|
|
1382
|
-
|
|
1497
|
+
// recovery_ctx stores the context about version edits and files to be
|
|
1498
|
+
// deleted. All those edits are persisted to new Manifest after successfully
|
|
1499
|
+
// syncing the new WAL.
|
|
1500
|
+
Status DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx);
|
|
1383
1501
|
|
|
1384
1502
|
// SetDbSessionId() should be called in the constuctor DBImpl()
|
|
1385
1503
|
// to ensure that db_session_id_ gets updated every time the DB is opened
|
|
1386
1504
|
void SetDbSessionId();
|
|
1387
1505
|
|
|
1388
1506
|
Status FailIfCfHasTs(const ColumnFamilyHandle* column_family) const;
|
|
1389
|
-
Status
|
|
1390
|
-
|
|
1507
|
+
Status FailIfTsMismatchCf(ColumnFamilyHandle* column_family, const Slice& ts,
|
|
1508
|
+
bool ts_for_read) const;
|
|
1509
|
+
|
|
1510
|
+
// recovery_ctx stores the context about version edits and
|
|
1511
|
+
// LogAndApplyForRecovery persist all those edits to new Manifest after
|
|
1512
|
+
// successfully syncing new WAL.
|
|
1513
|
+
// LogAndApplyForRecovery should be called only once during recovery and it
|
|
1514
|
+
// should be called when RocksDB writes to a first new MANIFEST since this
|
|
1515
|
+
// recovery.
|
|
1516
|
+
Status LogAndApplyForRecovery(const RecoveryContext& recovery_ctx);
|
|
1391
1517
|
|
|
1392
1518
|
private:
|
|
1393
1519
|
friend class DB;
|
|
@@ -1526,7 +1652,11 @@ class DBImpl : public DB {
|
|
|
1526
1652
|
output_path_id(_output_path_id),
|
|
1527
1653
|
exclusive(_exclusive),
|
|
1528
1654
|
disallow_trivial_move(_disallow_trivial_move),
|
|
1529
|
-
canceled(_canceled) {}
|
|
1655
|
+
canceled(_canceled ? *_canceled : canceled_internal_storage) {}
|
|
1656
|
+
// When _canceled is not provided by ther user, we assign the reference of
|
|
1657
|
+
// canceled_internal_storage to it to consolidate canceled and
|
|
1658
|
+
// manual_compaction_paused since DisableManualCompaction() might be
|
|
1659
|
+
// called
|
|
1530
1660
|
|
|
1531
1661
|
ColumnFamilyData* cfd;
|
|
1532
1662
|
int input_level;
|
|
@@ -1543,7 +1673,12 @@ class DBImpl : public DB {
|
|
|
1543
1673
|
InternalKey* manual_end = nullptr; // how far we are compacting
|
|
1544
1674
|
InternalKey tmp_storage; // Used to keep track of compaction progress
|
|
1545
1675
|
InternalKey tmp_storage1; // Used to keep track of compaction progress
|
|
1546
|
-
|
|
1676
|
+
|
|
1677
|
+
// When the user provides a canceled pointer in CompactRangeOptions, the
|
|
1678
|
+
// above varaibe is the reference of the user-provided
|
|
1679
|
+
// `canceled`, otherwise, it is the reference of canceled_internal_storage
|
|
1680
|
+
std::atomic<bool> canceled_internal_storage = false;
|
|
1681
|
+
std::atomic<bool>& canceled; // Compaction canceled pointer reference
|
|
1547
1682
|
};
|
|
1548
1683
|
struct PrepickedCompaction {
|
|
1549
1684
|
// background compaction takes ownership of `compaction`.
|
|
@@ -1645,7 +1780,8 @@ class DBImpl : public DB {
|
|
|
1645
1780
|
// corrupted_log_found is set to true if we recover from a corrupted log file.
|
|
1646
1781
|
Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
|
1647
1782
|
SequenceNumber* next_sequence, bool read_only,
|
|
1648
|
-
bool* corrupted_log_found
|
|
1783
|
+
bool* corrupted_log_found,
|
|
1784
|
+
RecoveryContext* recovery_ctx);
|
|
1649
1785
|
|
|
1650
1786
|
// The following two methods are used to flush a memtable to
|
|
1651
1787
|
// storage. The first one is used at database RecoveryTime (when the
|
|
@@ -1789,12 +1925,13 @@ class DBImpl : public DB {
|
|
|
1789
1925
|
IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer,
|
|
1790
1926
|
uint64_t* log_used, uint64_t* log_size,
|
|
1791
1927
|
Env::IOPriority rate_limiter_priority,
|
|
1792
|
-
|
|
1928
|
+
LogFileNumberSize& log_file_number_size);
|
|
1793
1929
|
|
|
1794
1930
|
IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group,
|
|
1795
1931
|
log::Writer* log_writer, uint64_t* log_used,
|
|
1796
1932
|
bool need_log_sync, bool need_log_dir_sync,
|
|
1797
|
-
SequenceNumber sequence
|
|
1933
|
+
SequenceNumber sequence,
|
|
1934
|
+
LogFileNumberSize& log_file_number_size);
|
|
1798
1935
|
|
|
1799
1936
|
IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
|
|
1800
1937
|
uint64_t* log_used,
|
|
@@ -1919,10 +2056,24 @@ class DBImpl : public DB {
|
|
|
1919
2056
|
SnapshotImpl* GetSnapshotImpl(bool is_write_conflict_boundary,
|
|
1920
2057
|
bool lock = true);
|
|
1921
2058
|
|
|
2059
|
+
// If snapshot_seq != kMaxSequenceNumber, then this function can only be
|
|
2060
|
+
// called from the write thread that publishes sequence numbers to readers.
|
|
2061
|
+
// For 1) write-committed, or 2) write-prepared + one-write-queue, this will
|
|
2062
|
+
// be the write thread performing memtable writes. For write-prepared with
|
|
2063
|
+
// two write queues, this will be the write thread writing commit marker to
|
|
2064
|
+
// the WAL.
|
|
2065
|
+
// If snapshot_seq == kMaxSequenceNumber, this function is called by a caller
|
|
2066
|
+
// ensuring no writes to the database.
|
|
2067
|
+
std::pair<Status, std::shared_ptr<const SnapshotImpl>>
|
|
2068
|
+
CreateTimestampedSnapshotImpl(SequenceNumber snapshot_seq, uint64_t ts,
|
|
2069
|
+
bool lock = true);
|
|
2070
|
+
|
|
1922
2071
|
uint64_t GetMaxTotalWalSize() const;
|
|
1923
2072
|
|
|
1924
2073
|
FSDirectory* GetDataDir(ColumnFamilyData* cfd, size_t path_id) const;
|
|
1925
2074
|
|
|
2075
|
+
Status MaybeReleaseTimestampedSnapshotsAndCheck();
|
|
2076
|
+
|
|
1926
2077
|
Status CloseHelper();
|
|
1927
2078
|
|
|
1928
2079
|
void WaitForBackgroundWork();
|
|
@@ -2124,11 +2275,7 @@ class DBImpl : public DB {
|
|
|
2124
2275
|
// are protected by locking both mutex_ and log_write_mutex_, and reads must
|
|
2125
2276
|
// be under either mutex_ or log_write_mutex_.
|
|
2126
2277
|
std::deque<LogFileNumberSize> alive_log_files_;
|
|
2127
|
-
|
|
2128
|
-
// call `alive_log_files_.back()` in the write thread (WriteToWAL()) which
|
|
2129
|
-
// requires locking db mutex if log_mutex_ is not already held in
|
|
2130
|
-
// two-write-queues mode.
|
|
2131
|
-
std::deque<LogFileNumberSize>::reverse_iterator alive_log_files_tail_;
|
|
2278
|
+
|
|
2132
2279
|
// Log files that aren't fully synced, and the current log file.
|
|
2133
2280
|
// Synchronization:
|
|
2134
2281
|
// - push_back() is done from write_thread_ with locked mutex_ and
|
|
@@ -2192,6 +2339,8 @@ class DBImpl : public DB {
|
|
|
2192
2339
|
|
|
2193
2340
|
SnapshotList snapshots_;
|
|
2194
2341
|
|
|
2342
|
+
TimestampedSnapshotList timestamped_snapshots_;
|
|
2343
|
+
|
|
2195
2344
|
// For each background job, pending_outputs_ keeps the current file number at
|
|
2196
2345
|
// the time that background job started.
|
|
2197
2346
|
// FindObsoleteFiles()/PurgeObsoleteFiles() never deletes any file that has
|
|
@@ -2479,8 +2628,9 @@ inline Status DBImpl::FailIfCfHasTs(
|
|
|
2479
2628
|
return Status::OK();
|
|
2480
2629
|
}
|
|
2481
2630
|
|
|
2482
|
-
inline Status DBImpl::
|
|
2483
|
-
|
|
2631
|
+
inline Status DBImpl::FailIfTsMismatchCf(ColumnFamilyHandle* column_family,
|
|
2632
|
+
const Slice& ts,
|
|
2633
|
+
bool ts_for_read) const {
|
|
2484
2634
|
if (!column_family) {
|
|
2485
2635
|
return Status::InvalidArgument("column family handle cannot be null");
|
|
2486
2636
|
}
|
|
@@ -2500,6 +2650,19 @@ inline Status DBImpl::FailIfTsSizesMismatch(
|
|
|
2500
2650
|
<< ts_sz << " given";
|
|
2501
2651
|
return Status::InvalidArgument(oss.str());
|
|
2502
2652
|
}
|
|
2653
|
+
if (ts_for_read) {
|
|
2654
|
+
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
|
|
2655
|
+
auto cfd = cfh->cfd();
|
|
2656
|
+
std::string current_ts_low = cfd->GetFullHistoryTsLow();
|
|
2657
|
+
if (!current_ts_low.empty() &&
|
|
2658
|
+
ucmp->CompareTimestamp(ts, current_ts_low) < 0) {
|
|
2659
|
+
std::stringstream oss;
|
|
2660
|
+
oss << "Read timestamp: " << ts.ToString(true)
|
|
2661
|
+
<< " is smaller than full_history_ts_low: "
|
|
2662
|
+
<< Slice(current_ts_low).ToString(true) << std::endl;
|
|
2663
|
+
return Status::InvalidArgument(oss.str());
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2503
2666
|
return Status::OK();
|
|
2504
2667
|
}
|
|
2505
2668
|
|
|
@@ -952,6 +952,8 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
|
|
|
952
952
|
VersionEdit edit;
|
|
953
953
|
edit.SetColumnFamily(cfd->GetID());
|
|
954
954
|
edit.SetFullHistoryTsLow(ts_low);
|
|
955
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::IncreaseFullHistoryTsLowImpl:BeforeEdit",
|
|
956
|
+
&edit);
|
|
955
957
|
|
|
956
958
|
InstrumentedMutexLock l(&mutex_);
|
|
957
959
|
std::string current_ts_low = cfd->GetFullHistoryTsLow();
|
|
@@ -959,12 +961,25 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
|
|
|
959
961
|
assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty());
|
|
960
962
|
if (!current_ts_low.empty() &&
|
|
961
963
|
ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) {
|
|
962
|
-
return Status::InvalidArgument(
|
|
963
|
-
"Cannot decrease full_history_timestamp_low");
|
|
964
|
+
return Status::InvalidArgument("Cannot decrease full_history_ts_low");
|
|
964
965
|
}
|
|
965
966
|
|
|
966
|
-
|
|
967
|
-
|
|
967
|
+
Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
|
|
968
|
+
&edit, &mutex_);
|
|
969
|
+
if (!s.ok()) {
|
|
970
|
+
return s;
|
|
971
|
+
}
|
|
972
|
+
current_ts_low = cfd->GetFullHistoryTsLow();
|
|
973
|
+
if (!current_ts_low.empty() &&
|
|
974
|
+
ucmp->CompareTimestamp(current_ts_low, ts_low) > 0) {
|
|
975
|
+
std::stringstream oss;
|
|
976
|
+
oss << "full_history_ts_low: " << Slice(current_ts_low).ToString(true)
|
|
977
|
+
<< " is set to be higher than the requested "
|
|
978
|
+
"timestamp: "
|
|
979
|
+
<< Slice(ts_low).ToString(true) << std::endl;
|
|
980
|
+
return Status::TryAgain(oss.str());
|
|
981
|
+
}
|
|
982
|
+
return Status::OK();
|
|
968
983
|
}
|
|
969
984
|
|
|
970
985
|
Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
@@ -1217,6 +1232,10 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
|
|
|
1217
1232
|
|
|
1218
1233
|
// Perform CompactFiles
|
|
1219
1234
|
TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile2");
|
|
1235
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
1236
|
+
"TestCompactFiles:PausingManualCompaction:3",
|
|
1237
|
+
reinterpret_cast<void*>(
|
|
1238
|
+
const_cast<std::atomic<int>*>(&manual_compaction_paused_)));
|
|
1220
1239
|
{
|
|
1221
1240
|
InstrumentedMutexLock l(&mutex_);
|
|
1222
1241
|
|
|
@@ -1372,7 +1391,7 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1372
1391
|
c->mutable_cf_options()->paranoid_file_checks,
|
|
1373
1392
|
c->mutable_cf_options()->report_bg_io_stats, dbname_,
|
|
1374
1393
|
&compaction_job_stats, Env::Priority::USER, io_tracer_,
|
|
1375
|
-
|
|
1394
|
+
kManualCompactionCanceledFalse_, db_id_, db_session_id_,
|
|
1376
1395
|
c->column_family_data()->GetFullHistoryTsLow(), c->trim_ts(),
|
|
1377
1396
|
&blob_callback_);
|
|
1378
1397
|
|
|
@@ -1838,8 +1857,7 @@ Status DBImpl::RunManualCompaction(
|
|
|
1838
1857
|
// and `CompactRangeOptions::canceled` might not work well together.
|
|
1839
1858
|
while (bg_bottom_compaction_scheduled_ > 0 ||
|
|
1840
1859
|
bg_compaction_scheduled_ > 0) {
|
|
1841
|
-
if (manual_compaction_paused_ > 0 ||
|
|
1842
|
-
(manual.canceled != nullptr && *manual.canceled == true)) {
|
|
1860
|
+
if (manual_compaction_paused_ > 0 || manual.canceled == true) {
|
|
1843
1861
|
// Pretend the error came from compaction so the below cleanup/error
|
|
1844
1862
|
// handling code can process it.
|
|
1845
1863
|
manual.done = true;
|
|
@@ -2376,10 +2394,18 @@ Status DBImpl::EnableAutoCompaction(
|
|
|
2376
2394
|
return s;
|
|
2377
2395
|
}
|
|
2378
2396
|
|
|
2397
|
+
// NOTE: Calling DisableManualCompaction() may overwrite the
|
|
2398
|
+
// user-provided canceled variable in CompactRangeOptions
|
|
2379
2399
|
void DBImpl::DisableManualCompaction() {
|
|
2380
2400
|
InstrumentedMutexLock l(&mutex_);
|
|
2381
2401
|
manual_compaction_paused_.fetch_add(1, std::memory_order_release);
|
|
2382
2402
|
|
|
2403
|
+
// Mark the canceled as true when the cancellation is triggered by
|
|
2404
|
+
// manual_compaction_paused (may overwrite user-provided `canceled`)
|
|
2405
|
+
for (const auto& manual_compaction : manual_compaction_dequeue_) {
|
|
2406
|
+
manual_compaction->canceled = true;
|
|
2407
|
+
}
|
|
2408
|
+
|
|
2383
2409
|
// Wake up manual compactions waiting to start.
|
|
2384
2410
|
bg_cv_.SignalAll();
|
|
2385
2411
|
|
|
@@ -2392,6 +2418,11 @@ void DBImpl::DisableManualCompaction() {
|
|
|
2392
2418
|
}
|
|
2393
2419
|
}
|
|
2394
2420
|
|
|
2421
|
+
// NOTE: In contrast to DisableManualCompaction(), calling
|
|
2422
|
+
// EnableManualCompaction() does NOT overwrite the user-provided *canceled
|
|
2423
|
+
// variable to be false since there is NO CHANCE a canceled compaction
|
|
2424
|
+
// is uncanceled. In other words, a canceled compaction must have been
|
|
2425
|
+
// dropped out of the manual compaction queue, when we disable it.
|
|
2395
2426
|
void DBImpl::EnableManualCompaction() {
|
|
2396
2427
|
InstrumentedMutexLock l(&mutex_);
|
|
2397
2428
|
assert(manual_compaction_paused_ > 0);
|
|
@@ -3037,10 +3068,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3037
3068
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
3038
3069
|
status = Status::ShutdownInProgress();
|
|
3039
3070
|
} else if (is_manual &&
|
|
3040
|
-
|
|
3041
|
-
status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
3042
|
-
} else if (is_manual && manual_compaction->canceled &&
|
|
3043
|
-
manual_compaction->canceled->load(std::memory_order_acquire)) {
|
|
3071
|
+
manual_compaction->canceled.load(std::memory_order_acquire)) {
|
|
3044
3072
|
status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
3045
3073
|
}
|
|
3046
3074
|
} else {
|
|
@@ -3357,6 +3385,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3357
3385
|
GetSnapshotContext(job_context, &snapshot_seqs,
|
|
3358
3386
|
&earliest_write_conflict_snapshot, &snapshot_checker);
|
|
3359
3387
|
assert(is_snapshot_supported_ || snapshots_.empty());
|
|
3388
|
+
|
|
3360
3389
|
CompactionJob compaction_job(
|
|
3361
3390
|
job_context->job_id, c.get(), immutable_db_options_,
|
|
3362
3391
|
mutable_db_options_, file_options_for_compaction_, versions_.get(),
|
|
@@ -3368,9 +3397,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3368
3397
|
c->mutable_cf_options()->paranoid_file_checks,
|
|
3369
3398
|
c->mutable_cf_options()->report_bg_io_stats, dbname_,
|
|
3370
3399
|
&compaction_job_stats, thread_pri, io_tracer_,
|
|
3371
|
-
is_manual ?
|
|
3372
|
-
|
|
3373
|
-
db_session_id_, c->column_family_data()->GetFullHistoryTsLow(),
|
|
3400
|
+
is_manual ? manual_compaction->canceled
|
|
3401
|
+
: kManualCompactionCanceledFalse_,
|
|
3402
|
+
db_id_, db_session_id_, c->column_family_data()->GetFullHistoryTsLow(),
|
|
3374
3403
|
c->trim_ts(), &blob_callback_);
|
|
3375
3404
|
compaction_job.Prepare();
|
|
3376
3405
|
|
|
@@ -166,8 +166,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
|
|
|
166
166
|
job_context->log_number = MinLogNumberToKeep();
|
|
167
167
|
job_context->prev_log_number = versions_->prev_log_number();
|
|
168
168
|
|
|
169
|
-
versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
|
|
170
169
|
if (doing_the_full_scan) {
|
|
170
|
+
versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
|
|
171
171
|
InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
|
|
172
172
|
dbname_);
|
|
173
173
|
std::set<std::string> paths;
|
|
@@ -242,6 +242,14 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
|
|
|
242
242
|
log_file, immutable_db_options_.db_log_dir);
|
|
243
243
|
}
|
|
244
244
|
}
|
|
245
|
+
} else {
|
|
246
|
+
// Instead of filling ob_context->sst_live and job_context->blob_live,
|
|
247
|
+
// directly remove files that show up in any Version. This is because
|
|
248
|
+
// candidate files tend to be a small percentage of all files, so it is
|
|
249
|
+
// usually cheaper to check them against every version, compared to
|
|
250
|
+
// building a map for all files.
|
|
251
|
+
versions_->RemoveLiveFiles(job_context->sst_delete_files,
|
|
252
|
+
job_context->blob_delete_files);
|
|
245
253
|
}
|
|
246
254
|
|
|
247
255
|
// logs_ is empty when called during recovery, in which case there can't yet
|
|
@@ -395,8 +403,10 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
395
403
|
state.manifest_delete_files.size());
|
|
396
404
|
// We may ignore the dbname when generating the file names.
|
|
397
405
|
for (auto& file : state.sst_delete_files) {
|
|
398
|
-
|
|
399
|
-
|
|
406
|
+
if (!file.only_delete_metadata) {
|
|
407
|
+
candidate_files.emplace_back(
|
|
408
|
+
MakeTableFileName(file.metadata->fd.GetNumber()), file.path);
|
|
409
|
+
}
|
|
400
410
|
if (file.metadata->table_reader_handle) {
|
|
401
411
|
table_cache_->Release(file.metadata->table_reader_handle);
|
|
402
412
|
}
|
|
@@ -863,7 +873,7 @@ uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
|
863
873
|
return min_log_number_to_keep;
|
|
864
874
|
}
|
|
865
875
|
|
|
866
|
-
Status DBImpl::SetDBId(bool read_only) {
|
|
876
|
+
Status DBImpl::SetDBId(bool read_only, RecoveryContext* recovery_ctx) {
|
|
867
877
|
Status s;
|
|
868
878
|
// Happens when immutable_db_options_.write_dbid_to_manifest is set to true
|
|
869
879
|
// the very first time.
|
|
@@ -890,14 +900,14 @@ Status DBImpl::SetDBId(bool read_only) {
|
|
|
890
900
|
}
|
|
891
901
|
s = GetDbIdentityFromIdentityFile(&db_id_);
|
|
892
902
|
if (immutable_db_options_.write_dbid_to_manifest && s.ok()) {
|
|
903
|
+
assert(!read_only);
|
|
904
|
+
assert(recovery_ctx != nullptr);
|
|
905
|
+
assert(versions_->GetColumnFamilySet() != nullptr);
|
|
893
906
|
VersionEdit edit;
|
|
894
907
|
edit.SetDBId(db_id_);
|
|
895
|
-
Options options;
|
|
896
|
-
MutableCFOptions mutable_cf_options(options);
|
|
897
908
|
versions_->db_id_ = db_id_;
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
/* new_descriptor_log */ false);
|
|
909
|
+
recovery_ctx->UpdateVersionEdits(
|
|
910
|
+
versions_->GetColumnFamilySet()->GetDefault(), edit);
|
|
901
911
|
}
|
|
902
912
|
} else if (!read_only) {
|
|
903
913
|
s = SetIdentityFile(env_, dbname_, db_id_);
|
|
@@ -905,7 +915,7 @@ Status DBImpl::SetDBId(bool read_only) {
|
|
|
905
915
|
return s;
|
|
906
916
|
}
|
|
907
917
|
|
|
908
|
-
Status DBImpl::DeleteUnreferencedSstFiles() {
|
|
918
|
+
Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
|
|
909
919
|
mutex_.AssertHeld();
|
|
910
920
|
std::vector<std::string> paths;
|
|
911
921
|
paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
|
|
@@ -925,7 +935,6 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
|
|
|
925
935
|
|
|
926
936
|
uint64_t next_file_number = versions_->current_next_file_number();
|
|
927
937
|
uint64_t largest_file_number = next_file_number;
|
|
928
|
-
std::set<std::string> files_to_delete;
|
|
929
938
|
Status s;
|
|
930
939
|
for (const auto& path : paths) {
|
|
931
940
|
std::vector<std::string> files;
|
|
@@ -943,8 +952,9 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
|
|
|
943
952
|
const std::string normalized_fpath = path + fname;
|
|
944
953
|
largest_file_number = std::max(largest_file_number, number);
|
|
945
954
|
if (type == kTableFile && number >= next_file_number &&
|
|
946
|
-
|
|
947
|
-
|
|
955
|
+
recovery_ctx->files_to_delete_.find(normalized_fpath) ==
|
|
956
|
+
recovery_ctx->files_to_delete_.end()) {
|
|
957
|
+
recovery_ctx->files_to_delete_.emplace(normalized_fpath);
|
|
948
958
|
}
|
|
949
959
|
}
|
|
950
960
|
}
|
|
@@ -961,21 +971,7 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
|
|
|
961
971
|
assert(versions_->GetColumnFamilySet());
|
|
962
972
|
ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault();
|
|
963
973
|
assert(default_cfd);
|
|
964
|
-
|
|
965
|
-
default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_,
|
|
966
|
-
directories_.GetDbDir(), /*new_descriptor_log*/ false);
|
|
967
|
-
if (!s.ok()) {
|
|
968
|
-
return s;
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
mutex_.Unlock();
|
|
972
|
-
for (const auto& fname : files_to_delete) {
|
|
973
|
-
s = env_->DeleteFile(fname);
|
|
974
|
-
if (!s.ok()) {
|
|
975
|
-
break;
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
mutex_.Lock();
|
|
974
|
+
recovery_ctx->UpdateVersionEdits(default_cfd, edit);
|
|
979
975
|
return s;
|
|
980
976
|
}
|
|
981
977
|
|