@nxtedition/rocksdb 7.0.3 → 7.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +320 -324
- package/chained-batch.js +6 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +8 -3
- package/deps/rocksdb/rocksdb/Makefile +10 -4
- package/deps/rocksdb/rocksdb/TARGETS +6 -4
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +14 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +8 -8
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +272 -174
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +201 -57
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +19 -19
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +170 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +95 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +298 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +172 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -3
- package/deps/rocksdb/rocksdb/db/column_family.h +6 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +10 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +38 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -5
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +4 -7
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -71
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +70 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +13 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +36 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +139 -91
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +48 -14
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +90 -55
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +9 -4
- package/deps/rocksdb/rocksdb/db/db_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -7
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +3 -1
- package/deps/rocksdb/rocksdb/db/dbformat.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/memtable.cc +1 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +43 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +13 -5
- package/deps/rocksdb/rocksdb/db/version_edit.h +22 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +4 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +109 -41
- package/deps/rocksdb/rocksdb/db/version_set.h +36 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -4
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/version_util.h +1 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/write_batch.cc +34 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -0
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +7 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +24 -3
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -3
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +8 -6
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -2
- package/deps/rocksdb/rocksdb/options/options_test.cc +1 -11
- package/deps/rocksdb/rocksdb/port/port_posix.h +7 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.h +11 -3
- package/deps/rocksdb/rocksdb/src.mk +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -33
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -118
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +6 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +10 -13
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +4 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -28
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -91
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -30
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -27
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +11 -13
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -40
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +22 -43
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +11 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +24 -25
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +0 -1
- package/deps/rocksdb/rocksdb/table/get_context.h +0 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +3 -18
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +3 -16
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +0 -201
- package/deps/rocksdb/rocksdb/util/distributed_mutex.h +48 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +5 -11
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -21
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +45 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +21 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +10 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +3 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +9 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +3 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +5 -4
- package/deps/rocksdb/rocksdb.gyp +1 -1
- package/index.js +36 -14
- package/package-lock.json +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +0 -358
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +0 -127
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +0 -219
|
@@ -15,7 +15,6 @@ enum class WriteBatchOpType {
|
|
|
15
15
|
kSingleDelete,
|
|
16
16
|
kDeleteRange,
|
|
17
17
|
kMerge,
|
|
18
|
-
kBlobIndex,
|
|
19
18
|
kNum,
|
|
20
19
|
};
|
|
21
20
|
|
|
@@ -25,11 +24,28 @@ WriteBatchOpType operator+(WriteBatchOpType lhs, const int rhs) {
|
|
|
25
24
|
return static_cast<WriteBatchOpType>(static_cast<T>(lhs) + rhs);
|
|
26
25
|
}
|
|
27
26
|
|
|
27
|
+
enum class WriteMode {
|
|
28
|
+
// `Write()` a `WriteBatch` constructed with `protection_bytes_per_key > 0`.
|
|
29
|
+
kWriteProtectedBatch = 0,
|
|
30
|
+
// `Write()` a `WriteBatch` constructed with `protection_bytes_per_key == 0`.
|
|
31
|
+
// Protection is enabled via `WriteOptions::protection_bytes_per_key > 0`.
|
|
32
|
+
kWriteUnprotectedBatch,
|
|
33
|
+
// TODO(ajkr): add a mode that uses `Write()` wrappers, e.g., `Put()`.
|
|
34
|
+
kNum,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// Integer addition is needed for `::testing::Range()` to take the enum type.
|
|
38
|
+
WriteMode operator+(WriteMode lhs, const int rhs) {
|
|
39
|
+
using T = std::underlying_type<WriteMode>::type;
|
|
40
|
+
return static_cast<WriteMode>(static_cast<T>(lhs) + rhs);
|
|
41
|
+
}
|
|
42
|
+
|
|
28
43
|
std::pair<WriteBatch, Status> GetWriteBatch(ColumnFamilyHandle* cf_handle,
|
|
44
|
+
size_t protection_bytes_per_key,
|
|
29
45
|
WriteBatchOpType op_type) {
|
|
30
46
|
Status s;
|
|
31
47
|
WriteBatch wb(0 /* reserved_bytes */, 0 /* max_bytes */,
|
|
32
|
-
|
|
48
|
+
protection_bytes_per_key, 0 /* default_cf_ts_sz */);
|
|
33
49
|
switch (op_type) {
|
|
34
50
|
case WriteBatchOpType::kPut:
|
|
35
51
|
s = wb.Put(cf_handle, "key", "val");
|
|
@@ -46,36 +62,44 @@ std::pair<WriteBatch, Status> GetWriteBatch(ColumnFamilyHandle* cf_handle,
|
|
|
46
62
|
case WriteBatchOpType::kMerge:
|
|
47
63
|
s = wb.Merge(cf_handle, "key", "val");
|
|
48
64
|
break;
|
|
49
|
-
case WriteBatchOpType::kBlobIndex: {
|
|
50
|
-
// TODO(ajkr): use public API once available.
|
|
51
|
-
uint32_t cf_id;
|
|
52
|
-
if (cf_handle == nullptr) {
|
|
53
|
-
cf_id = 0;
|
|
54
|
-
} else {
|
|
55
|
-
cf_id = cf_handle->GetID();
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
std::string blob_index;
|
|
59
|
-
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
|
|
60
|
-
"val");
|
|
61
|
-
|
|
62
|
-
s = WriteBatchInternal::PutBlobIndex(&wb, cf_id, "key", blob_index);
|
|
63
|
-
break;
|
|
64
|
-
}
|
|
65
65
|
case WriteBatchOpType::kNum:
|
|
66
66
|
assert(false);
|
|
67
67
|
}
|
|
68
68
|
return {std::move(wb), std::move(s)};
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
-
class DbKvChecksumTest
|
|
72
|
-
|
|
73
|
-
|
|
71
|
+
class DbKvChecksumTest : public DBTestBase,
|
|
72
|
+
public ::testing::WithParamInterface<
|
|
73
|
+
std::tuple<WriteBatchOpType, char, WriteMode>> {
|
|
74
74
|
public:
|
|
75
75
|
DbKvChecksumTest()
|
|
76
76
|
: DBTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {
|
|
77
77
|
op_type_ = std::get<0>(GetParam());
|
|
78
78
|
corrupt_byte_addend_ = std::get<1>(GetParam());
|
|
79
|
+
write_mode_ = std::get<2>(GetParam());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
Status ExecuteWrite(ColumnFamilyHandle* cf_handle) {
|
|
83
|
+
switch (write_mode_) {
|
|
84
|
+
case WriteMode::kWriteProtectedBatch: {
|
|
85
|
+
auto batch_and_status = GetWriteBatch(
|
|
86
|
+
cf_handle, 8 /* protection_bytes_per_key */, op_type_);
|
|
87
|
+
assert(batch_and_status.second.ok());
|
|
88
|
+
return db_->Write(WriteOptions(), &batch_and_status.first);
|
|
89
|
+
}
|
|
90
|
+
case WriteMode::kWriteUnprotectedBatch: {
|
|
91
|
+
auto batch_and_status = GetWriteBatch(
|
|
92
|
+
cf_handle, 0 /* protection_bytes_per_key */, op_type_);
|
|
93
|
+
assert(batch_and_status.second.ok());
|
|
94
|
+
WriteOptions write_opts;
|
|
95
|
+
write_opts.protection_bytes_per_key = 8;
|
|
96
|
+
return db_->Write(write_opts, &batch_and_status.first);
|
|
97
|
+
}
|
|
98
|
+
case WriteMode::kNum:
|
|
99
|
+
assert(false);
|
|
100
|
+
}
|
|
101
|
+
return Status::NotSupported("WriteMode " +
|
|
102
|
+
std::to_string(static_cast<int>(write_mode_)));
|
|
79
103
|
}
|
|
80
104
|
|
|
81
105
|
void CorruptNextByteCallBack(void* arg) {
|
|
@@ -96,6 +120,7 @@ class DbKvChecksumTest
|
|
|
96
120
|
protected:
|
|
97
121
|
WriteBatchOpType op_type_;
|
|
98
122
|
char corrupt_byte_addend_;
|
|
123
|
+
WriteMode write_mode_;
|
|
99
124
|
size_t corrupt_byte_offset_ = 0;
|
|
100
125
|
size_t entry_len_ = std::numeric_limits<size_t>::max();
|
|
101
126
|
};
|
|
@@ -114,9 +139,6 @@ std::string GetOpTypeString(const WriteBatchOpType& op_type) {
|
|
|
114
139
|
case WriteBatchOpType::kMerge:
|
|
115
140
|
return "Merge";
|
|
116
141
|
break;
|
|
117
|
-
case WriteBatchOpType::kBlobIndex:
|
|
118
|
-
return "BlobIndex";
|
|
119
|
-
break;
|
|
120
142
|
case WriteBatchOpType::kNum:
|
|
121
143
|
assert(false);
|
|
122
144
|
}
|
|
@@ -128,15 +150,31 @@ INSTANTIATE_TEST_CASE_P(
|
|
|
128
150
|
DbKvChecksumTest, DbKvChecksumTest,
|
|
129
151
|
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
|
|
130
152
|
WriteBatchOpType::kNum),
|
|
131
|
-
::testing::Values(2, 103, 251)
|
|
132
|
-
|
|
153
|
+
::testing::Values(2, 103, 251),
|
|
154
|
+
::testing::Range(static_cast<WriteMode>(0),
|
|
155
|
+
WriteMode::kNum)),
|
|
156
|
+
[](const testing::TestParamInfo<
|
|
157
|
+
std::tuple<WriteBatchOpType, char, WriteMode>>& args) {
|
|
133
158
|
std::ostringstream oss;
|
|
134
159
|
oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
|
|
135
160
|
<< static_cast<int>(
|
|
136
161
|
static_cast<unsigned char>(std::get<1>(args.param)));
|
|
162
|
+
switch (std::get<2>(args.param)) {
|
|
163
|
+
case WriteMode::kWriteProtectedBatch:
|
|
164
|
+
oss << "WriteProtectedBatch";
|
|
165
|
+
break;
|
|
166
|
+
case WriteMode::kWriteUnprotectedBatch:
|
|
167
|
+
oss << "WriteUnprotectedBatch";
|
|
168
|
+
break;
|
|
169
|
+
case WriteMode::kNum:
|
|
170
|
+
assert(false);
|
|
171
|
+
}
|
|
137
172
|
return oss.str();
|
|
138
173
|
});
|
|
139
174
|
|
|
175
|
+
// TODO(ajkr): add a test that corrupts the `WriteBatch` contents. Such
|
|
176
|
+
// corruptions should only be detectable in `WriteMode::kWriteProtectedBatch`.
|
|
177
|
+
|
|
140
178
|
TEST_P(DbKvChecksumTest, MemTableAddCorrupted) {
|
|
141
179
|
// This test repeatedly attempts to write `WriteBatch`es containing a single
|
|
142
180
|
// entry of type `op_type_`. Each attempt has one byte corrupted in its
|
|
@@ -158,10 +196,7 @@ TEST_P(DbKvChecksumTest, MemTableAddCorrupted) {
|
|
|
158
196
|
Reopen(options);
|
|
159
197
|
|
|
160
198
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
161
|
-
|
|
162
|
-
ASSERT_OK(batch_and_status.second);
|
|
163
|
-
ASSERT_TRUE(
|
|
164
|
-
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
199
|
+
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
|
|
165
200
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
166
201
|
|
|
167
202
|
// In case the above callback is not invoked, this test will run
|
|
@@ -194,10 +229,7 @@ TEST_P(DbKvChecksumTest, MemTableAddWithColumnFamilyCorrupted) {
|
|
|
194
229
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
|
|
195
230
|
|
|
196
231
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
197
|
-
|
|
198
|
-
ASSERT_OK(batch_and_status.second);
|
|
199
|
-
ASSERT_TRUE(
|
|
200
|
-
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
232
|
+
ASSERT_TRUE(ExecuteWrite(handles_[1]).IsCorruption());
|
|
201
233
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
202
234
|
|
|
203
235
|
// In case the above callback is not invoked, this test will run
|
|
@@ -209,7 +241,8 @@ TEST_P(DbKvChecksumTest, MemTableAddWithColumnFamilyCorrupted) {
|
|
|
209
241
|
|
|
210
242
|
TEST_P(DbKvChecksumTest, NoCorruptionCase) {
|
|
211
243
|
// If this test fails, we may have found a piece of malfunctioned hardware
|
|
212
|
-
auto batch_and_status =
|
|
244
|
+
auto batch_and_status =
|
|
245
|
+
GetWriteBatch(nullptr, 8 /* protection_bytes_per_key */, op_type_);
|
|
213
246
|
ASSERT_OK(batch_and_status.second);
|
|
214
247
|
ASSERT_OK(batch_and_status.first.VerifyChecksum());
|
|
215
248
|
}
|
|
@@ -238,10 +271,7 @@ TEST_P(DbKvChecksumTest, WriteToWALCorrupted) {
|
|
|
238
271
|
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
239
272
|
|
|
240
273
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
241
|
-
|
|
242
|
-
ASSERT_OK(batch_and_status.second);
|
|
243
|
-
ASSERT_TRUE(
|
|
244
|
-
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
274
|
+
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
|
|
245
275
|
// Confirm that nothing was written to WAL
|
|
246
276
|
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
247
277
|
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
@@ -279,10 +309,7 @@ TEST_P(DbKvChecksumTest, WriteToWALWithColumnFamilyCorrupted) {
|
|
|
279
309
|
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
280
310
|
|
|
281
311
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
282
|
-
|
|
283
|
-
ASSERT_OK(batch_and_status.second);
|
|
284
|
-
ASSERT_TRUE(
|
|
285
|
-
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
312
|
+
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
|
|
286
313
|
// Confirm that nothing was written to WAL
|
|
287
314
|
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
288
315
|
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
@@ -322,9 +349,11 @@ void CorruptWriteBatch(Slice* content, size_t offset,
|
|
|
322
349
|
|
|
323
350
|
TEST_P(DbKvChecksumTestMergedBatch, NoCorruptionCase) {
|
|
324
351
|
// Veirfy write batch checksum after write batch append
|
|
325
|
-
auto batch1 = GetWriteBatch(nullptr /* cf_handle */,
|
|
352
|
+
auto batch1 = GetWriteBatch(nullptr /* cf_handle */,
|
|
353
|
+
8 /* protection_bytes_per_key */, op_type1_);
|
|
326
354
|
ASSERT_OK(batch1.second);
|
|
327
|
-
auto batch2 = GetWriteBatch(nullptr /* cf_handle */,
|
|
355
|
+
auto batch2 = GetWriteBatch(nullptr /* cf_handle */,
|
|
356
|
+
8 /* protection_bytes_per_key */, op_type2_);
|
|
328
357
|
ASSERT_OK(batch2.second);
|
|
329
358
|
ASSERT_OK(WriteBatchInternal::Append(&batch1.first, &batch2.first));
|
|
330
359
|
ASSERT_OK(batch1.first.VerifyChecksum());
|
|
@@ -345,11 +374,11 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) {
|
|
|
345
374
|
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
346
375
|
}
|
|
347
376
|
|
|
348
|
-
auto leader_batch_and_status =
|
|
349
|
-
|
|
377
|
+
auto leader_batch_and_status = GetWriteBatch(
|
|
378
|
+
nullptr /* cf_handle */, 8 /* protection_bytes_per_key */, op_type1_);
|
|
350
379
|
ASSERT_OK(leader_batch_and_status.second);
|
|
351
|
-
auto follower_batch_and_status =
|
|
352
|
-
|
|
380
|
+
auto follower_batch_and_status = GetWriteBatch(
|
|
381
|
+
nullptr /* cf_handle */, 8 /* protection_bytes_per_key */, op_type2_);
|
|
353
382
|
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
|
|
354
383
|
size_t total_bytes =
|
|
355
384
|
leader_batch_size + follower_batch_and_status.first.GetDataSize();
|
|
@@ -390,7 +419,8 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) {
|
|
|
390
419
|
// follower
|
|
391
420
|
follower_thread = port::Thread([&]() {
|
|
392
421
|
follower_batch_and_status =
|
|
393
|
-
GetWriteBatch(nullptr /* cf_handle */,
|
|
422
|
+
GetWriteBatch(nullptr /* cf_handle */,
|
|
423
|
+
8 /* protection_bytes_per_key */, op_type2_);
|
|
394
424
|
ASSERT_OK(follower_batch_and_status.second);
|
|
395
425
|
ASSERT_TRUE(
|
|
396
426
|
db_->Write(WriteOptions(), &follower_batch_and_status.first)
|
|
@@ -413,7 +443,8 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) {
|
|
|
413
443
|
Reopen(options);
|
|
414
444
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
415
445
|
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
416
|
-
leader_batch_and_status = GetWriteBatch(
|
|
446
|
+
leader_batch_and_status = GetWriteBatch(
|
|
447
|
+
nullptr /* cf_handle */, 8 /* protection_bytes_per_key */, op_type1_);
|
|
417
448
|
ASSERT_OK(leader_batch_and_status.second);
|
|
418
449
|
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
|
|
419
450
|
.IsCorruption());
|
|
@@ -452,9 +483,11 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) {
|
|
|
452
483
|
}
|
|
453
484
|
CreateAndReopenWithCF({"ramen"}, options);
|
|
454
485
|
|
|
455
|
-
auto leader_batch_and_status =
|
|
486
|
+
auto leader_batch_and_status =
|
|
487
|
+
GetWriteBatch(handles_[1], 8 /* protection_bytes_per_key */, op_type1_);
|
|
456
488
|
ASSERT_OK(leader_batch_and_status.second);
|
|
457
|
-
auto follower_batch_and_status =
|
|
489
|
+
auto follower_batch_and_status =
|
|
490
|
+
GetWriteBatch(handles_[1], 8 /* protection_bytes_per_key */, op_type2_);
|
|
458
491
|
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
|
|
459
492
|
size_t total_bytes =
|
|
460
493
|
leader_batch_size + follower_batch_and_status.first.GetDataSize();
|
|
@@ -494,7 +527,8 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) {
|
|
|
494
527
|
// Start the other writer thread which will join the write group as
|
|
495
528
|
// follower
|
|
496
529
|
follower_thread = port::Thread([&]() {
|
|
497
|
-
follower_batch_and_status = GetWriteBatch(
|
|
530
|
+
follower_batch_and_status = GetWriteBatch(
|
|
531
|
+
handles_[1], 8 /* protection_bytes_per_key */, op_type2_);
|
|
498
532
|
ASSERT_OK(follower_batch_and_status.second);
|
|
499
533
|
ASSERT_TRUE(
|
|
500
534
|
db_->Write(WriteOptions(), &follower_batch_and_status.first)
|
|
@@ -518,7 +552,8 @@ TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) {
|
|
|
518
552
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "ramen"}, options);
|
|
519
553
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
520
554
|
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
521
|
-
leader_batch_and_status =
|
|
555
|
+
leader_batch_and_status =
|
|
556
|
+
GetWriteBatch(handles_[1], 8 /* protection_bytes_per_key */, op_type1_);
|
|
522
557
|
ASSERT_OK(leader_batch_and_status.second);
|
|
523
558
|
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
|
|
524
559
|
.IsCorruption());
|
|
@@ -139,8 +139,6 @@ TEST_P(DBRateLimiterOnReadTest, Get) {
|
|
|
139
139
|
}
|
|
140
140
|
|
|
141
141
|
TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
|
|
142
|
-
// The new void-returning `MultiGet()` APIs use `MultiRead()`, which does not
|
|
143
|
-
// yet support rate limiting.
|
|
144
142
|
if (use_direct_io_ && !IsDirectIOSupported()) {
|
|
145
143
|
return;
|
|
146
144
|
}
|
|
@@ -149,6 +147,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
|
|
|
149
147
|
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
|
|
150
148
|
|
|
151
149
|
const int kNumKeys = kNumFiles * kNumKeysPerFile;
|
|
150
|
+
int64_t expected = 0;
|
|
152
151
|
{
|
|
153
152
|
std::vector<std::string> key_bufs;
|
|
154
153
|
key_bufs.reserve(kNumKeys);
|
|
@@ -160,13 +159,19 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
|
|
|
160
159
|
}
|
|
161
160
|
std::vector<Status> statuses(kNumKeys);
|
|
162
161
|
std::vector<PinnableSlice> values(kNumKeys);
|
|
162
|
+
const int64_t prev_total_rl_req = options_.rate_limiter->GetTotalRequests();
|
|
163
163
|
db_->MultiGet(GetReadOptions(), dbfull()->DefaultColumnFamily(), kNumKeys,
|
|
164
164
|
keys.data(), values.data(), statuses.data());
|
|
165
|
+
const int64_t cur_total_rl_req = options_.rate_limiter->GetTotalRequests();
|
|
165
166
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
166
|
-
ASSERT_TRUE(statuses[i].
|
|
167
|
+
ASSERT_TRUE(statuses[i].ok());
|
|
167
168
|
}
|
|
169
|
+
ASSERT_GT(cur_total_rl_req, prev_total_rl_req);
|
|
170
|
+
ASSERT_EQ(cur_total_rl_req - prev_total_rl_req,
|
|
171
|
+
options_.rate_limiter->GetTotalRequests(Env::IO_USER));
|
|
168
172
|
}
|
|
169
|
-
|
|
173
|
+
expected += kNumKeys;
|
|
174
|
+
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
|
|
170
175
|
}
|
|
171
176
|
|
|
172
177
|
TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
|
|
@@ -4271,7 +4271,9 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
|
|
|
4271
4271
|
threads.emplace_back([&] {
|
|
4272
4272
|
for (size_t i = cnt; i < 2 * cnt; i++) {
|
|
4273
4273
|
auto istr = std::to_string(i);
|
|
4274
|
-
WriteBatch batch
|
|
4274
|
+
WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
|
|
4275
|
+
wopt.protection_bytes_per_key,
|
|
4276
|
+
0 /* default_cf_ts_sz */);
|
|
4275
4277
|
ASSERT_OK(batch.Put("a" + istr, "b" + istr));
|
|
4276
4278
|
ASSERT_OK(
|
|
4277
4279
|
dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true));
|
|
@@ -1261,11 +1261,11 @@ class RecoveryTestHelper {
|
|
|
1261
1261
|
std::unique_ptr<WalManager> wal_manager;
|
|
1262
1262
|
WriteController write_controller;
|
|
1263
1263
|
|
|
1264
|
-
versions.reset(new VersionSet(
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1264
|
+
versions.reset(new VersionSet(
|
|
1265
|
+
test->dbname_, &db_options, file_options, table_cache.get(),
|
|
1266
|
+
&write_buffer_manager, &write_controller,
|
|
1267
|
+
/*block_cache_tracer=*/nullptr,
|
|
1268
|
+
/*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ ""));
|
|
1269
1269
|
|
|
1270
1270
|
wal_manager.reset(
|
|
1271
1271
|
new WalManager(db_options, file_options, /*io_tracer=*/nullptr));
|
|
@@ -1497,6 +1497,8 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) {
|
|
|
1497
1497
|
// The following make sure there are two bg flush threads.
|
|
1498
1498
|
options.max_background_jobs = 8;
|
|
1499
1499
|
|
|
1500
|
+
DestroyAndReopen(options);
|
|
1501
|
+
|
|
1500
1502
|
const std::string cf1_name("cf1");
|
|
1501
1503
|
CreateAndReopenWithCF({cf1_name}, options);
|
|
1502
1504
|
assert(handles_.size() == 2);
|
|
@@ -1512,10 +1514,13 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) {
|
|
|
1512
1514
|
ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "foo", "value"));
|
|
1513
1515
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "value"));
|
|
1514
1516
|
|
|
1515
|
-
ASSERT_OK(dbfull()->TEST_FlushMemTable(
|
|
1517
|
+
ASSERT_OK(dbfull()->TEST_FlushMemTable(
|
|
1518
|
+
/*wait=*/false, /*allow_write_stall=*/true, handles_[1]));
|
|
1516
1519
|
|
|
1517
1520
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "value"));
|
|
1518
|
-
|
|
1521
|
+
|
|
1522
|
+
ASSERT_OK(dbfull()->TEST_FlushMemTable(
|
|
1523
|
+
/*wait=*/false, /*allow_write_stall=*/true, handles_[0]));
|
|
1519
1524
|
|
|
1520
1525
|
bool called = false;
|
|
1521
1526
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
@@ -334,6 +334,41 @@ TEST_P(DBWriteTest, ManualWalFlushInEffect) {
|
|
|
334
334
|
ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty());
|
|
335
335
|
}
|
|
336
336
|
|
|
337
|
+
TEST_P(DBWriteTest, UnflushedPutRaceWithTrackedWalSync) {
|
|
338
|
+
// Repro race condition bug where unflushed WAL data extended the synced size
|
|
339
|
+
// recorded to MANIFEST despite being unrecoverable.
|
|
340
|
+
Options options = GetOptions();
|
|
341
|
+
std::unique_ptr<FaultInjectionTestEnv> fault_env(
|
|
342
|
+
new FaultInjectionTestEnv(env_));
|
|
343
|
+
options.env = fault_env.get();
|
|
344
|
+
options.manual_wal_flush = true;
|
|
345
|
+
options.track_and_verify_wals_in_manifest = true;
|
|
346
|
+
Reopen(options);
|
|
347
|
+
|
|
348
|
+
ASSERT_OK(Put("key1", "val1"));
|
|
349
|
+
|
|
350
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
351
|
+
"DBImpl::SyncWAL:Begin",
|
|
352
|
+
[this](void* /* arg */) { ASSERT_OK(Put("key2", "val2")); });
|
|
353
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
354
|
+
|
|
355
|
+
ASSERT_OK(db_->FlushWAL(true /* sync */));
|
|
356
|
+
|
|
357
|
+
// Ensure callback ran.
|
|
358
|
+
ASSERT_EQ("val2", Get("key2"));
|
|
359
|
+
|
|
360
|
+
Close();
|
|
361
|
+
|
|
362
|
+
// Simulate full loss of unsynced data. This drops "key2" -> "val2" from the
|
|
363
|
+
// DB WAL.
|
|
364
|
+
fault_env->DropUnsyncedFileData();
|
|
365
|
+
|
|
366
|
+
Reopen(options);
|
|
367
|
+
|
|
368
|
+
// Need to close before `fault_env` goes out of scope.
|
|
369
|
+
Close();
|
|
370
|
+
}
|
|
371
|
+
|
|
337
372
|
TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) {
|
|
338
373
|
std::unique_ptr<FaultInjectionTestEnv> mock_env(
|
|
339
374
|
new FaultInjectionTestEnv(env_));
|
|
@@ -26,7 +26,7 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
26
26
|
// and the value type is embedded as the low 8 bits in the sequence
|
|
27
27
|
// number in internal keys, we need to use the highest-numbered
|
|
28
28
|
// ValueType, not the lowest).
|
|
29
|
-
const ValueType kValueTypeForSeek =
|
|
29
|
+
const ValueType kValueTypeForSeek = kTypeWideColumnEntity;
|
|
30
30
|
const ValueType kValueTypeForSeekForPrev = kTypeDeletion;
|
|
31
31
|
const std::string kDisableUserTimestamp("");
|
|
32
32
|
|
|
@@ -46,6 +46,8 @@ EntryType GetEntryType(ValueType value_type) {
|
|
|
46
46
|
return kEntryRangeDeletion;
|
|
47
47
|
case kTypeBlobIndex:
|
|
48
48
|
return kEntryBlobIndex;
|
|
49
|
+
case kTypeWideColumnEntity:
|
|
50
|
+
return kEntryWideColumnEntity;
|
|
49
51
|
default:
|
|
50
52
|
return kEntryOther;
|
|
51
53
|
}
|
|
@@ -66,7 +66,9 @@ enum ValueType : unsigned char {
|
|
|
66
66
|
kTypeBeginUnprepareXID = 0x13, // WAL only.
|
|
67
67
|
kTypeDeletionWithTimestamp = 0x14,
|
|
68
68
|
kTypeCommitXIDAndTimestamp = 0x15, // WAL only
|
|
69
|
-
|
|
69
|
+
kTypeWideColumnEntity = 0x16,
|
|
70
|
+
kTypeColumnFamilyWideColumnEntity = 0x17, // WAL only
|
|
71
|
+
kMaxValue = 0x7F // Not used for storing records.
|
|
70
72
|
};
|
|
71
73
|
|
|
72
74
|
// Defined in dbformat.cc
|
|
@@ -76,8 +78,8 @@ extern const ValueType kValueTypeForSeekForPrev;
|
|
|
76
78
|
// Checks whether a type is an inline value type
|
|
77
79
|
// (i.e. a type used in memtable skiplist and sst file datablock).
|
|
78
80
|
inline bool IsValueType(ValueType t) {
|
|
79
|
-
return t <= kTypeMerge ||
|
|
80
|
-
kTypeDeletionWithTimestamp == t;
|
|
81
|
+
return t <= kTypeMerge || kTypeSingleDeletion == t || kTypeBlobIndex == t ||
|
|
82
|
+
kTypeDeletionWithTimestamp == t || kTypeWideColumnEntity == t;
|
|
81
83
|
}
|
|
82
84
|
|
|
83
85
|
// Checks whether a type is from user operation
|
|
@@ -128,7 +128,7 @@ class FlushJobTestBase : public testing::Test {
|
|
|
128
128
|
new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
|
|
129
129
|
&write_buffer_manager_, &write_controller_,
|
|
130
130
|
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
|
|
131
|
-
/*db_session_id*/ ""));
|
|
131
|
+
/*db_id*/ "", /*db_session_id*/ ""));
|
|
132
132
|
EXPECT_OK(versions_->Recover(column_families, false));
|
|
133
133
|
}
|
|
134
134
|
|
|
@@ -1159,6 +1159,7 @@ Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
|
|
|
1159
1159
|
if (VarintLength(new_prev_size) < VarintLength(prev_size)) {
|
|
1160
1160
|
// shift the value buffer as well.
|
|
1161
1161
|
memcpy(p, prev_buffer, new_prev_size);
|
|
1162
|
+
prev_buffer = p;
|
|
1162
1163
|
}
|
|
1163
1164
|
}
|
|
1164
1165
|
RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
|
|
@@ -103,7 +103,8 @@ class MemTableListTest : public testing::Test {
|
|
|
103
103
|
VersionSet versions(dbname, &immutable_db_options, env_options,
|
|
104
104
|
table_cache.get(), &write_buffer_manager,
|
|
105
105
|
&write_controller, /*block_cache_tracer=*/nullptr,
|
|
106
|
-
/*io_tracer=*/nullptr, /*
|
|
106
|
+
/*io_tracer=*/nullptr, /*db_id*/ "",
|
|
107
|
+
/*db_session_id*/ "");
|
|
107
108
|
std::vector<ColumnFamilyDescriptor> cf_descs;
|
|
108
109
|
cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions());
|
|
109
110
|
cf_descs.emplace_back("one", ColumnFamilyOptions());
|
|
@@ -153,7 +154,8 @@ class MemTableListTest : public testing::Test {
|
|
|
153
154
|
VersionSet versions(dbname, &immutable_db_options, env_options,
|
|
154
155
|
table_cache.get(), &write_buffer_manager,
|
|
155
156
|
&write_controller, /*block_cache_tracer=*/nullptr,
|
|
156
|
-
/*io_tracer=*/nullptr, /*
|
|
157
|
+
/*io_tracer=*/nullptr, /*db_id*/ "",
|
|
158
|
+
/*db_session_id*/ "");
|
|
157
159
|
std::vector<ColumnFamilyDescriptor> cf_descs;
|
|
158
160
|
cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions());
|
|
159
161
|
cf_descs.emplace_back("one", ColumnFamilyOptions());
|
|
@@ -122,7 +122,7 @@ class Repairer {
|
|
|
122
122
|
vset_(dbname_, &immutable_db_options_, file_options_,
|
|
123
123
|
raw_table_cache_.get(), &wb_, &wc_,
|
|
124
124
|
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
|
|
125
|
-
db_session_id_),
|
|
125
|
+
/*db_id=*/"", db_session_id_),
|
|
126
126
|
next_file_number_(1),
|
|
127
127
|
db_lock_(nullptr),
|
|
128
128
|
closed_(false) {
|
|
@@ -249,6 +249,8 @@ class VersionBuilder::Rep {
|
|
|
249
249
|
bool has_invalid_levels_;
|
|
250
250
|
// Current levels of table files affected by additions/deletions.
|
|
251
251
|
std::unordered_map<uint64_t, int> table_file_levels_;
|
|
252
|
+
// Current compact cursors that should be changed after the last compaction
|
|
253
|
+
std::unordered_map<int, InternalKey> updated_compact_cursors_;
|
|
252
254
|
NewestFirstBySeqNo level_zero_cmp_;
|
|
253
255
|
BySmallestKey level_nonzero_cmp_;
|
|
254
256
|
|
|
@@ -809,6 +811,22 @@ class VersionBuilder::Rep {
|
|
|
809
811
|
return Status::OK();
|
|
810
812
|
}
|
|
811
813
|
|
|
814
|
+
Status ApplyCompactCursors(int level,
|
|
815
|
+
const InternalKey& smallest_uncompacted_key) {
|
|
816
|
+
if (level < 0) {
|
|
817
|
+
std::ostringstream oss;
|
|
818
|
+
oss << "Cannot add compact cursor (" << level << ","
|
|
819
|
+
<< smallest_uncompacted_key.Encode().ToString()
|
|
820
|
+
<< " due to invalid level (level = " << level << ")";
|
|
821
|
+
return Status::Corruption("VersionBuilder", oss.str());
|
|
822
|
+
}
|
|
823
|
+
if (level < num_levels_) {
|
|
824
|
+
// Omit levels (>= num_levels_) when re-open with shrinking num_levels_
|
|
825
|
+
updated_compact_cursors_[level] = smallest_uncompacted_key;
|
|
826
|
+
}
|
|
827
|
+
return Status::OK();
|
|
828
|
+
}
|
|
829
|
+
|
|
812
830
|
// Apply all of the edits in *edit to the current state.
|
|
813
831
|
Status Apply(const VersionEdit* edit) {
|
|
814
832
|
{
|
|
@@ -860,6 +878,16 @@ class VersionBuilder::Rep {
|
|
|
860
878
|
}
|
|
861
879
|
}
|
|
862
880
|
|
|
881
|
+
// Populate compact cursors for round-robin compaction, leave
|
|
882
|
+
// the cursor to be empty to indicate it is invalid
|
|
883
|
+
for (const auto& cursor : edit->GetCompactCursors()) {
|
|
884
|
+
const int level = cursor.first;
|
|
885
|
+
const InternalKey smallest_uncompacted_key = cursor.second;
|
|
886
|
+
const Status s = ApplyCompactCursors(level, smallest_uncompacted_key);
|
|
887
|
+
if (!s.ok()) {
|
|
888
|
+
return s;
|
|
889
|
+
}
|
|
890
|
+
}
|
|
863
891
|
return Status::OK();
|
|
864
892
|
}
|
|
865
893
|
|
|
@@ -1142,12 +1170,24 @@ class VersionBuilder::Rep {
|
|
|
1142
1170
|
}
|
|
1143
1171
|
}
|
|
1144
1172
|
|
|
1173
|
+
void SaveCompactCursorsTo(VersionStorageInfo* vstorage) const {
|
|
1174
|
+
for (auto iter = updated_compact_cursors_.begin();
|
|
1175
|
+
iter != updated_compact_cursors_.end(); iter++) {
|
|
1176
|
+
vstorage->AddCursorForOneLevel(iter->first, iter->second);
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1145
1180
|
// Save the current state in *vstorage.
|
|
1146
1181
|
Status SaveTo(VersionStorageInfo* vstorage) const {
|
|
1147
|
-
Status s
|
|
1182
|
+
Status s;
|
|
1183
|
+
|
|
1184
|
+
#ifndef NDEBUG
|
|
1185
|
+
// The same check is done within Apply() so we skip it in release mode.
|
|
1186
|
+
s = CheckConsistency(base_vstorage_);
|
|
1148
1187
|
if (!s.ok()) {
|
|
1149
1188
|
return s;
|
|
1150
1189
|
}
|
|
1190
|
+
#endif // NDEBUG
|
|
1151
1191
|
|
|
1152
1192
|
s = CheckConsistency(vstorage);
|
|
1153
1193
|
if (!s.ok()) {
|
|
@@ -1158,6 +1198,8 @@ class VersionBuilder::Rep {
|
|
|
1158
1198
|
|
|
1159
1199
|
SaveBlobFilesTo(vstorage);
|
|
1160
1200
|
|
|
1201
|
+
SaveCompactCursorsTo(vstorage);
|
|
1202
|
+
|
|
1161
1203
|
s = CheckConsistency(vstorage);
|
|
1162
1204
|
return s;
|
|
1163
1205
|
}
|
|
@@ -79,6 +79,7 @@ void VersionEdit::Clear() {
|
|
|
79
79
|
has_max_column_family_ = false;
|
|
80
80
|
has_min_log_number_to_keep_ = false;
|
|
81
81
|
has_last_sequence_ = false;
|
|
82
|
+
compact_cursors_.clear();
|
|
82
83
|
deleted_files_.clear();
|
|
83
84
|
new_files_.clear();
|
|
84
85
|
blob_file_additions_.clear();
|
|
@@ -121,6 +122,13 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
|
|
|
121
122
|
if (has_last_sequence_) {
|
|
122
123
|
PutVarint32Varint64(dst, kLastSequence, last_sequence_);
|
|
123
124
|
}
|
|
125
|
+
for (size_t i = 0; i < compact_cursors_.size(); i++) {
|
|
126
|
+
if (compact_cursors_[i].second.Valid()) {
|
|
127
|
+
PutVarint32(dst, kCompactCursor);
|
|
128
|
+
PutVarint32(dst, compact_cursors_[i].first); // level
|
|
129
|
+
PutLengthPrefixedSlice(dst, compact_cursors_[i].second.Encode());
|
|
130
|
+
}
|
|
131
|
+
}
|
|
124
132
|
for (const auto& deleted : deleted_files_) {
|
|
125
133
|
PutVarint32Varint32Varint64(dst, kDeletedFile, deleted.first /* level */,
|
|
126
134
|
deleted.second /* file number */);
|
|
@@ -512,15 +520,15 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
|
|
512
520
|
}
|
|
513
521
|
break;
|
|
514
522
|
|
|
515
|
-
case
|
|
523
|
+
case kCompactCursor:
|
|
516
524
|
if (GetLevel(&input, &level, &msg) &&
|
|
517
525
|
GetInternalKey(&input, &key)) {
|
|
518
|
-
// we
|
|
519
|
-
//
|
|
520
|
-
|
|
526
|
+
// Here we re-use the output format of compact pointer in LevelDB
|
|
527
|
+
// to persist compact_cursors_
|
|
528
|
+
compact_cursors_.push_back(std::make_pair(level, key));
|
|
521
529
|
} else {
|
|
522
530
|
if (!msg) {
|
|
523
|
-
msg = "compaction
|
|
531
|
+
msg = "compaction cursor";
|
|
524
532
|
}
|
|
525
533
|
}
|
|
526
534
|
break;
|