@nxtedition/rocksdb 7.0.0-alpha.7 → 7.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +67 -73
- package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +3 -1
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +28 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +5 -2
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +48 -60
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +18 -20
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/c.cc +5 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +20 -0
- package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +44 -26
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +32 -14
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -44
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +54 -32
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +426 -61
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +102 -24
- package/deps/rocksdb/rocksdb/db/db_test2.cc +159 -30
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +1 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +39 -10
- package/deps/rocksdb/rocksdb/db/version_builder.h +4 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +20 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_set.h +17 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +119 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +96 -0
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -0
- package/deps/rocksdb/rocksdb/db/write_thread.cc +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +96 -6
- package/deps/rocksdb/rocksdb/env/io_posix.cc +51 -18
- package/deps/rocksdb/rocksdb/env/io_posix.h +2 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +12 -5
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +22 -6
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +99 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +9 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +11 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +4 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +12 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options.cc +8 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -1
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_test.cc +52 -0
- package/deps/rocksdb/rocksdb/port/port_posix.h +10 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +39 -12
- package/deps/rocksdb/rocksdb/util/comparator.cc +10 -0
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1 -1
- package/deps/rocksdb/rocksdb/util/xxhash.h +2 -1
- package/index.js +4 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -25,6 +25,49 @@ WriteBatchOpType operator+(WriteBatchOpType lhs, const int rhs) {
|
|
|
25
25
|
return static_cast<WriteBatchOpType>(static_cast<T>(lhs) + rhs);
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
std::pair<WriteBatch, Status> GetWriteBatch(ColumnFamilyHandle* cf_handle,
|
|
29
|
+
WriteBatchOpType op_type) {
|
|
30
|
+
Status s;
|
|
31
|
+
WriteBatch wb(0 /* reserved_bytes */, 0 /* max_bytes */,
|
|
32
|
+
8 /* protection_bytes_per_entry */, 0 /* default_cf_ts_sz */);
|
|
33
|
+
switch (op_type) {
|
|
34
|
+
case WriteBatchOpType::kPut:
|
|
35
|
+
s = wb.Put(cf_handle, "key", "val");
|
|
36
|
+
break;
|
|
37
|
+
case WriteBatchOpType::kDelete:
|
|
38
|
+
s = wb.Delete(cf_handle, "key");
|
|
39
|
+
break;
|
|
40
|
+
case WriteBatchOpType::kSingleDelete:
|
|
41
|
+
s = wb.SingleDelete(cf_handle, "key");
|
|
42
|
+
break;
|
|
43
|
+
case WriteBatchOpType::kDeleteRange:
|
|
44
|
+
s = wb.DeleteRange(cf_handle, "begin", "end");
|
|
45
|
+
break;
|
|
46
|
+
case WriteBatchOpType::kMerge:
|
|
47
|
+
s = wb.Merge(cf_handle, "key", "val");
|
|
48
|
+
break;
|
|
49
|
+
case WriteBatchOpType::kBlobIndex: {
|
|
50
|
+
// TODO(ajkr): use public API once available.
|
|
51
|
+
uint32_t cf_id;
|
|
52
|
+
if (cf_handle == nullptr) {
|
|
53
|
+
cf_id = 0;
|
|
54
|
+
} else {
|
|
55
|
+
cf_id = cf_handle->GetID();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
std::string blob_index;
|
|
59
|
+
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
|
|
60
|
+
"val");
|
|
61
|
+
|
|
62
|
+
s = WriteBatchInternal::PutBlobIndex(&wb, cf_id, "key", blob_index);
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
case WriteBatchOpType::kNum:
|
|
66
|
+
assert(false);
|
|
67
|
+
}
|
|
68
|
+
return {std::move(wb), std::move(s)};
|
|
69
|
+
}
|
|
70
|
+
|
|
28
71
|
class DbKvChecksumTest
|
|
29
72
|
: public DBTestBase,
|
|
30
73
|
public ::testing::WithParamInterface<std::tuple<WriteBatchOpType, char>> {
|
|
@@ -35,48 +78,6 @@ class DbKvChecksumTest
|
|
|
35
78
|
corrupt_byte_addend_ = std::get<1>(GetParam());
|
|
36
79
|
}
|
|
37
80
|
|
|
38
|
-
std::pair<WriteBatch, Status> GetWriteBatch(ColumnFamilyHandle* cf_handle) {
|
|
39
|
-
Status s;
|
|
40
|
-
WriteBatch wb(0 /* reserved_bytes */, 0 /* max_bytes */,
|
|
41
|
-
8 /* protection_bytes_per_entry */, 0 /* default_cf_ts_sz */);
|
|
42
|
-
switch (op_type_) {
|
|
43
|
-
case WriteBatchOpType::kPut:
|
|
44
|
-
s = wb.Put(cf_handle, "key", "val");
|
|
45
|
-
break;
|
|
46
|
-
case WriteBatchOpType::kDelete:
|
|
47
|
-
s = wb.Delete(cf_handle, "key");
|
|
48
|
-
break;
|
|
49
|
-
case WriteBatchOpType::kSingleDelete:
|
|
50
|
-
s = wb.SingleDelete(cf_handle, "key");
|
|
51
|
-
break;
|
|
52
|
-
case WriteBatchOpType::kDeleteRange:
|
|
53
|
-
s = wb.DeleteRange(cf_handle, "begin", "end");
|
|
54
|
-
break;
|
|
55
|
-
case WriteBatchOpType::kMerge:
|
|
56
|
-
s = wb.Merge(cf_handle, "key", "val");
|
|
57
|
-
break;
|
|
58
|
-
case WriteBatchOpType::kBlobIndex: {
|
|
59
|
-
// TODO(ajkr): use public API once available.
|
|
60
|
-
uint32_t cf_id;
|
|
61
|
-
if (cf_handle == nullptr) {
|
|
62
|
-
cf_id = 0;
|
|
63
|
-
} else {
|
|
64
|
-
cf_id = cf_handle->GetID();
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
std::string blob_index;
|
|
68
|
-
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
|
|
69
|
-
"val");
|
|
70
|
-
|
|
71
|
-
s = WriteBatchInternal::PutBlobIndex(&wb, cf_id, "key", blob_index);
|
|
72
|
-
break;
|
|
73
|
-
}
|
|
74
|
-
case WriteBatchOpType::kNum:
|
|
75
|
-
assert(false);
|
|
76
|
-
}
|
|
77
|
-
return {std::move(wb), std::move(s)};
|
|
78
|
-
}
|
|
79
|
-
|
|
80
81
|
void CorruptNextByteCallBack(void* arg) {
|
|
81
82
|
Slice encoded = *static_cast<Slice*>(arg);
|
|
82
83
|
if (entry_len_ == std::numeric_limits<size_t>::max()) {
|
|
@@ -99,34 +100,28 @@ class DbKvChecksumTest
|
|
|
99
100
|
size_t entry_len_ = std::numeric_limits<size_t>::max();
|
|
100
101
|
};
|
|
101
102
|
|
|
102
|
-
std::string
|
|
103
|
-
|
|
104
|
-
std::ostringstream oss;
|
|
105
|
-
switch (std::get<0>(info.param)) {
|
|
103
|
+
std::string GetOpTypeString(const WriteBatchOpType& op_type) {
|
|
104
|
+
switch (op_type) {
|
|
106
105
|
case WriteBatchOpType::kPut:
|
|
107
|
-
|
|
108
|
-
break;
|
|
106
|
+
return "Put";
|
|
109
107
|
case WriteBatchOpType::kDelete:
|
|
110
|
-
|
|
111
|
-
break;
|
|
108
|
+
return "Delete";
|
|
112
109
|
case WriteBatchOpType::kSingleDelete:
|
|
113
|
-
|
|
114
|
-
break;
|
|
110
|
+
return "SingleDelete";
|
|
115
111
|
case WriteBatchOpType::kDeleteRange:
|
|
116
|
-
|
|
112
|
+
return "DeleteRange";
|
|
117
113
|
break;
|
|
118
114
|
case WriteBatchOpType::kMerge:
|
|
119
|
-
|
|
115
|
+
return "Merge";
|
|
120
116
|
break;
|
|
121
117
|
case WriteBatchOpType::kBlobIndex:
|
|
122
|
-
|
|
118
|
+
return "BlobIndex";
|
|
123
119
|
break;
|
|
124
120
|
case WriteBatchOpType::kNum:
|
|
125
121
|
assert(false);
|
|
126
122
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return oss.str();
|
|
123
|
+
assert(false);
|
|
124
|
+
return "";
|
|
130
125
|
}
|
|
131
126
|
|
|
132
127
|
INSTANTIATE_TEST_CASE_P(
|
|
@@ -134,7 +129,13 @@ INSTANTIATE_TEST_CASE_P(
|
|
|
134
129
|
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
|
|
135
130
|
WriteBatchOpType::kNum),
|
|
136
131
|
::testing::Values(2, 103, 251)),
|
|
137
|
-
|
|
132
|
+
[](const testing::TestParamInfo<std::tuple<WriteBatchOpType, char>>& args) {
|
|
133
|
+
std::ostringstream oss;
|
|
134
|
+
oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
|
|
135
|
+
<< static_cast<int>(
|
|
136
|
+
static_cast<unsigned char>(std::get<1>(args.param)));
|
|
137
|
+
return oss.str();
|
|
138
|
+
});
|
|
138
139
|
|
|
139
140
|
TEST_P(DbKvChecksumTest, MemTableAddCorrupted) {
|
|
140
141
|
// This test repeatedly attempts to write `WriteBatch`es containing a single
|
|
@@ -157,11 +158,16 @@ TEST_P(DbKvChecksumTest, MemTableAddCorrupted) {
|
|
|
157
158
|
Reopen(options);
|
|
158
159
|
|
|
159
160
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
160
|
-
auto batch_and_status = GetWriteBatch(nullptr /* cf_handle
|
|
161
|
+
auto batch_and_status = GetWriteBatch(nullptr /* cf_handle */, op_type_);
|
|
161
162
|
ASSERT_OK(batch_and_status.second);
|
|
162
163
|
ASSERT_TRUE(
|
|
163
164
|
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
164
165
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
166
|
+
|
|
167
|
+
// In case the above callback is not invoked, this test will run
|
|
168
|
+
// numeric_limits<size_t>::max() times until it reports an error (or will
|
|
169
|
+
// exhaust disk space). Added this assert to report error early.
|
|
170
|
+
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
|
|
165
171
|
}
|
|
166
172
|
}
|
|
167
173
|
|
|
@@ -188,14 +194,373 @@ TEST_P(DbKvChecksumTest, MemTableAddWithColumnFamilyCorrupted) {
|
|
|
188
194
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
|
|
189
195
|
|
|
190
196
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
191
|
-
auto batch_and_status = GetWriteBatch(handles_[1]);
|
|
197
|
+
auto batch_and_status = GetWriteBatch(handles_[1], op_type_);
|
|
198
|
+
ASSERT_OK(batch_and_status.second);
|
|
199
|
+
ASSERT_TRUE(
|
|
200
|
+
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
201
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
202
|
+
|
|
203
|
+
// In case the above callback is not invoked, this test will run
|
|
204
|
+
// numeric_limits<size_t>::max() times until it reports an error (or will
|
|
205
|
+
// exhaust disk space). Added this assert to report error early.
|
|
206
|
+
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
TEST_P(DbKvChecksumTest, NoCorruptionCase) {
|
|
211
|
+
// If this test fails, we may have found a piece of malfunctioned hardware
|
|
212
|
+
auto batch_and_status = GetWriteBatch(nullptr, op_type_);
|
|
213
|
+
ASSERT_OK(batch_and_status.second);
|
|
214
|
+
ASSERT_OK(batch_and_status.first.VerifyChecksum());
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
TEST_P(DbKvChecksumTest, WriteToWALCorrupted) {
|
|
218
|
+
// This test repeatedly attempts to write `WriteBatch`es containing a single
|
|
219
|
+
// entry of type `op_type_`. Each attempt has one byte corrupted by adding
|
|
220
|
+
// `corrupt_byte_addend_` to its original value. The test repeats until an
|
|
221
|
+
// attempt has been made on each byte in the encoded write batch. All attempts
|
|
222
|
+
// are expected to fail with `Status::Corruption`
|
|
223
|
+
Options options = CurrentOptions();
|
|
224
|
+
if (op_type_ == WriteBatchOpType::kMerge) {
|
|
225
|
+
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
226
|
+
}
|
|
227
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
228
|
+
"DBImpl::WriteToWAL:log_entry",
|
|
229
|
+
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
|
|
230
|
+
std::placeholders::_1));
|
|
231
|
+
// First 8 bytes are for sequence number which is not protected in write batch
|
|
232
|
+
corrupt_byte_offset_ = 8;
|
|
233
|
+
|
|
234
|
+
while (MoreBytesToCorrupt()) {
|
|
235
|
+
// Corrupted write batch leads to read-only mode, so we have to
|
|
236
|
+
// reopen for every attempt.
|
|
237
|
+
Reopen(options);
|
|
238
|
+
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
239
|
+
|
|
240
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
241
|
+
auto batch_and_status = GetWriteBatch(nullptr /* cf_handle */, op_type_);
|
|
242
|
+
ASSERT_OK(batch_and_status.second);
|
|
243
|
+
ASSERT_TRUE(
|
|
244
|
+
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
245
|
+
// Confirm that nothing was written to WAL
|
|
246
|
+
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
247
|
+
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
248
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
249
|
+
|
|
250
|
+
// In case the above callback is not invoked, this test will run
|
|
251
|
+
// numeric_limits<size_t>::max() times until it reports an error (or will
|
|
252
|
+
// exhaust disk space). Added this assert to report error early.
|
|
253
|
+
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
TEST_P(DbKvChecksumTest, WriteToWALWithColumnFamilyCorrupted) {
|
|
258
|
+
// This test repeatedly attempts to write `WriteBatch`es containing a single
|
|
259
|
+
// entry of type `op_type_`. Each attempt has one byte corrupted by adding
|
|
260
|
+
// `corrupt_byte_addend_` to its original value. The test repeats until an
|
|
261
|
+
// attempt has been made on each byte in the encoded write batch. All attempts
|
|
262
|
+
// are expected to fail with `Status::Corruption`
|
|
263
|
+
Options options = CurrentOptions();
|
|
264
|
+
if (op_type_ == WriteBatchOpType::kMerge) {
|
|
265
|
+
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
266
|
+
}
|
|
267
|
+
CreateAndReopenWithCF({"pikachu"}, options);
|
|
268
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
269
|
+
"DBImpl::WriteToWAL:log_entry",
|
|
270
|
+
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
|
|
271
|
+
std::placeholders::_1));
|
|
272
|
+
// First 8 bytes are for sequence number which is not protected in write batch
|
|
273
|
+
corrupt_byte_offset_ = 8;
|
|
274
|
+
|
|
275
|
+
while (MoreBytesToCorrupt()) {
|
|
276
|
+
// Corrupted write batch leads to read-only mode, so we have to
|
|
277
|
+
// reopen for every attempt.
|
|
278
|
+
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
|
|
279
|
+
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
280
|
+
|
|
281
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
282
|
+
auto batch_and_status = GetWriteBatch(handles_[1], op_type_);
|
|
192
283
|
ASSERT_OK(batch_and_status.second);
|
|
193
284
|
ASSERT_TRUE(
|
|
194
285
|
db_->Write(WriteOptions(), &batch_and_status.first).IsCorruption());
|
|
286
|
+
// Confirm that nothing was written to WAL
|
|
287
|
+
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
288
|
+
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
195
289
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
290
|
+
|
|
291
|
+
// In case the above callback is not invoked, this test will run
|
|
292
|
+
// numeric_limits<size_t>::max() times until it reports an error (or will
|
|
293
|
+
// exhaust disk space). Added this assert to report error early.
|
|
294
|
+
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
class DbKvChecksumTestMergedBatch
|
|
299
|
+
: public DBTestBase,
|
|
300
|
+
public ::testing::WithParamInterface<
|
|
301
|
+
std::tuple<WriteBatchOpType, WriteBatchOpType, char>> {
|
|
302
|
+
public:
|
|
303
|
+
DbKvChecksumTestMergedBatch()
|
|
304
|
+
: DBTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {
|
|
305
|
+
op_type1_ = std::get<0>(GetParam());
|
|
306
|
+
op_type2_ = std::get<1>(GetParam());
|
|
307
|
+
corrupt_byte_addend_ = std::get<2>(GetParam());
|
|
196
308
|
}
|
|
309
|
+
|
|
310
|
+
protected:
|
|
311
|
+
WriteBatchOpType op_type1_;
|
|
312
|
+
WriteBatchOpType op_type2_;
|
|
313
|
+
char corrupt_byte_addend_;
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
void CorruptWriteBatch(Slice* content, size_t offset,
|
|
317
|
+
char corrupt_byte_addend) {
|
|
318
|
+
ASSERT_TRUE(offset < content->size());
|
|
319
|
+
char* buf = const_cast<char*>(content->data());
|
|
320
|
+
buf[offset] += corrupt_byte_addend;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
TEST_P(DbKvChecksumTestMergedBatch, NoCorruptionCase) {
|
|
324
|
+
// Veirfy write batch checksum after write batch append
|
|
325
|
+
auto batch1 = GetWriteBatch(nullptr /* cf_handle */, op_type1_);
|
|
326
|
+
ASSERT_OK(batch1.second);
|
|
327
|
+
auto batch2 = GetWriteBatch(nullptr /* cf_handle */, op_type2_);
|
|
328
|
+
ASSERT_OK(batch2.second);
|
|
329
|
+
ASSERT_OK(WriteBatchInternal::Append(&batch1.first, &batch2.first));
|
|
330
|
+
ASSERT_OK(batch1.first.VerifyChecksum());
|
|
197
331
|
}
|
|
198
332
|
|
|
333
|
+
TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) {
|
|
334
|
+
// This test has two writers repeatedly attempt to write `WriteBatch`es
|
|
335
|
+
// containing a single entry of type op_type1_ and op_type2_ respectively. The
|
|
336
|
+
// leader of the write group writes the batch containinng the entry of type
|
|
337
|
+
// op_type1_. One byte of the pre-merged write batches is corrupted by adding
|
|
338
|
+
// `corrupt_byte_addend_` to the batch's original value during each attempt.
|
|
339
|
+
// The test repeats until an attempt has been made on each byte in both
|
|
340
|
+
// pre-merged write batches. All attempts are expected to fail with
|
|
341
|
+
// `Status::Corruption`.
|
|
342
|
+
Options options = CurrentOptions();
|
|
343
|
+
if (op_type1_ == WriteBatchOpType::kMerge ||
|
|
344
|
+
op_type2_ == WriteBatchOpType::kMerge) {
|
|
345
|
+
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
auto leader_batch_and_status =
|
|
349
|
+
GetWriteBatch(nullptr /* cf_handle */, op_type1_);
|
|
350
|
+
ASSERT_OK(leader_batch_and_status.second);
|
|
351
|
+
auto follower_batch_and_status =
|
|
352
|
+
GetWriteBatch(nullptr /* cf_handle */, op_type2_);
|
|
353
|
+
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
|
|
354
|
+
size_t total_bytes =
|
|
355
|
+
leader_batch_size + follower_batch_and_status.first.GetDataSize();
|
|
356
|
+
// First 8 bytes are for sequence number which is not protected in write batch
|
|
357
|
+
size_t corrupt_byte_offset = 8;
|
|
358
|
+
|
|
359
|
+
std::atomic<bool> follower_joined{false};
|
|
360
|
+
std::atomic<int> leader_count{0};
|
|
361
|
+
port::Thread follower_thread;
|
|
362
|
+
// This callback should only be called by the leader thread
|
|
363
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
364
|
+
"WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) {
|
|
365
|
+
auto* leader = reinterpret_cast<WriteThread::Writer*>(arg_leader);
|
|
366
|
+
ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER);
|
|
367
|
+
|
|
368
|
+
// This callback should only be called by the follower thread
|
|
369
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
370
|
+
"WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) {
|
|
371
|
+
auto* follower =
|
|
372
|
+
reinterpret_cast<WriteThread::Writer*>(arg_follower);
|
|
373
|
+
// The leader thread will wait on this bool and hence wait until
|
|
374
|
+
// this writer joins the write group
|
|
375
|
+
ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER);
|
|
376
|
+
if (corrupt_byte_offset >= leader_batch_size) {
|
|
377
|
+
Slice batch_content = follower->batch->Data();
|
|
378
|
+
CorruptWriteBatch(&batch_content,
|
|
379
|
+
corrupt_byte_offset - leader_batch_size,
|
|
380
|
+
corrupt_byte_addend_);
|
|
381
|
+
}
|
|
382
|
+
// Leader busy waits on this flag
|
|
383
|
+
follower_joined = true;
|
|
384
|
+
// So the follower does not enter the outer callback at
|
|
385
|
+
// WriteThread::JoinBatchGroup:Wait2
|
|
386
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
// Start the other writer thread which will join the write group as
|
|
390
|
+
// follower
|
|
391
|
+
follower_thread = port::Thread([&]() {
|
|
392
|
+
follower_batch_and_status =
|
|
393
|
+
GetWriteBatch(nullptr /* cf_handle */, op_type2_);
|
|
394
|
+
ASSERT_OK(follower_batch_and_status.second);
|
|
395
|
+
ASSERT_TRUE(
|
|
396
|
+
db_->Write(WriteOptions(), &follower_batch_and_status.first)
|
|
397
|
+
.IsCorruption());
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size);
|
|
401
|
+
if (corrupt_byte_offset < leader_batch_size) {
|
|
402
|
+
Slice batch_content = leader->batch->Data();
|
|
403
|
+
CorruptWriteBatch(&batch_content, corrupt_byte_offset,
|
|
404
|
+
corrupt_byte_addend_);
|
|
405
|
+
}
|
|
406
|
+
leader_count++;
|
|
407
|
+
while (!follower_joined) {
|
|
408
|
+
// busy waiting
|
|
409
|
+
}
|
|
410
|
+
});
|
|
411
|
+
while (corrupt_byte_offset < total_bytes) {
|
|
412
|
+
// Reopen DB since it failed WAL write which lead to read-only mode
|
|
413
|
+
Reopen(options);
|
|
414
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
415
|
+
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
416
|
+
leader_batch_and_status = GetWriteBatch(nullptr /* cf_handle */, op_type1_);
|
|
417
|
+
ASSERT_OK(leader_batch_and_status.second);
|
|
418
|
+
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
|
|
419
|
+
.IsCorruption());
|
|
420
|
+
follower_thread.join();
|
|
421
|
+
// Prevent leader thread from entering this callback
|
|
422
|
+
SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait");
|
|
423
|
+
ASSERT_EQ(1, leader_count);
|
|
424
|
+
// Nothing should have been written to WAL
|
|
425
|
+
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
426
|
+
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
427
|
+
|
|
428
|
+
corrupt_byte_offset++;
|
|
429
|
+
if (corrupt_byte_offset == leader_batch_size) {
|
|
430
|
+
// skip over the sequence number part of follower's write batch
|
|
431
|
+
corrupt_byte_offset += 8;
|
|
432
|
+
}
|
|
433
|
+
follower_joined = false;
|
|
434
|
+
leader_count = 0;
|
|
435
|
+
}
|
|
436
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) {
|
|
440
|
+
// This test has two writers repeatedly attempt to write `WriteBatch`es
|
|
441
|
+
// containing a single entry of type op_type1_ and op_type2_ respectively. The
|
|
442
|
+
// leader of the write group writes the batch containinng the entry of type
|
|
443
|
+
// op_type1_. One byte of the pre-merged write batches is corrupted by adding
|
|
444
|
+
// `corrupt_byte_addend_` to the batch's original value during each attempt.
|
|
445
|
+
// The test repeats until an attempt has been made on each byte in both
|
|
446
|
+
// pre-merged write batches. All attempts are expected to fail with
|
|
447
|
+
// `Status::Corruption`.
|
|
448
|
+
Options options = CurrentOptions();
|
|
449
|
+
if (op_type1_ == WriteBatchOpType::kMerge ||
|
|
450
|
+
op_type2_ == WriteBatchOpType::kMerge) {
|
|
451
|
+
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
452
|
+
}
|
|
453
|
+
CreateAndReopenWithCF({"ramen"}, options);
|
|
454
|
+
|
|
455
|
+
auto leader_batch_and_status = GetWriteBatch(handles_[1], op_type1_);
|
|
456
|
+
ASSERT_OK(leader_batch_and_status.second);
|
|
457
|
+
auto follower_batch_and_status = GetWriteBatch(handles_[1], op_type2_);
|
|
458
|
+
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
|
|
459
|
+
size_t total_bytes =
|
|
460
|
+
leader_batch_size + follower_batch_and_status.first.GetDataSize();
|
|
461
|
+
// First 8 bytes are for sequence number which is not protected in write batch
|
|
462
|
+
size_t corrupt_byte_offset = 8;
|
|
463
|
+
|
|
464
|
+
std::atomic<bool> follower_joined{false};
|
|
465
|
+
std::atomic<int> leader_count{0};
|
|
466
|
+
port::Thread follower_thread;
|
|
467
|
+
// This callback should only be called by the leader thread
|
|
468
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
469
|
+
"WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) {
|
|
470
|
+
auto* leader = reinterpret_cast<WriteThread::Writer*>(arg_leader);
|
|
471
|
+
ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER);
|
|
472
|
+
|
|
473
|
+
// This callback should only be called by the follower thread
|
|
474
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
475
|
+
"WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) {
|
|
476
|
+
auto* follower =
|
|
477
|
+
reinterpret_cast<WriteThread::Writer*>(arg_follower);
|
|
478
|
+
// The leader thread will wait on this bool and hence wait until
|
|
479
|
+
// this writer joins the write group
|
|
480
|
+
ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER);
|
|
481
|
+
if (corrupt_byte_offset >= leader_batch_size) {
|
|
482
|
+
Slice batch_content =
|
|
483
|
+
WriteBatchInternal::Contents(follower->batch);
|
|
484
|
+
CorruptWriteBatch(&batch_content,
|
|
485
|
+
corrupt_byte_offset - leader_batch_size,
|
|
486
|
+
corrupt_byte_addend_);
|
|
487
|
+
}
|
|
488
|
+
follower_joined = true;
|
|
489
|
+
// So the follower does not enter the outer callback at
|
|
490
|
+
// WriteThread::JoinBatchGroup:Wait2
|
|
491
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
// Start the other writer thread which will join the write group as
|
|
495
|
+
// follower
|
|
496
|
+
follower_thread = port::Thread([&]() {
|
|
497
|
+
follower_batch_and_status = GetWriteBatch(handles_[1], op_type2_);
|
|
498
|
+
ASSERT_OK(follower_batch_and_status.second);
|
|
499
|
+
ASSERT_TRUE(
|
|
500
|
+
db_->Write(WriteOptions(), &follower_batch_and_status.first)
|
|
501
|
+
.IsCorruption());
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size);
|
|
505
|
+
if (corrupt_byte_offset < leader_batch_size) {
|
|
506
|
+
Slice batch_content = WriteBatchInternal::Contents(leader->batch);
|
|
507
|
+
CorruptWriteBatch(&batch_content, corrupt_byte_offset,
|
|
508
|
+
corrupt_byte_addend_);
|
|
509
|
+
}
|
|
510
|
+
leader_count++;
|
|
511
|
+
while (!follower_joined) {
|
|
512
|
+
// busy waiting
|
|
513
|
+
}
|
|
514
|
+
});
|
|
515
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
516
|
+
while (corrupt_byte_offset < total_bytes) {
|
|
517
|
+
// Reopen DB since it failed WAL write which lead to read-only mode
|
|
518
|
+
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "ramen"}, options);
|
|
519
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
520
|
+
auto log_size_pre_write = dbfull()->TEST_total_log_size();
|
|
521
|
+
leader_batch_and_status = GetWriteBatch(handles_[1], op_type1_);
|
|
522
|
+
ASSERT_OK(leader_batch_and_status.second);
|
|
523
|
+
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
|
|
524
|
+
.IsCorruption());
|
|
525
|
+
follower_thread.join();
|
|
526
|
+
// Prevent leader thread from entering this callback
|
|
527
|
+
SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait");
|
|
528
|
+
|
|
529
|
+
ASSERT_EQ(1, leader_count);
|
|
530
|
+
// Nothing should have been written to WAL
|
|
531
|
+
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
|
|
532
|
+
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
|
|
533
|
+
|
|
534
|
+
corrupt_byte_offset++;
|
|
535
|
+
if (corrupt_byte_offset == leader_batch_size) {
|
|
536
|
+
// skip over the sequence number part of follower's write batch
|
|
537
|
+
corrupt_byte_offset += 8;
|
|
538
|
+
}
|
|
539
|
+
follower_joined = false;
|
|
540
|
+
leader_count = 0;
|
|
541
|
+
}
|
|
542
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
INSTANTIATE_TEST_CASE_P(
|
|
546
|
+
DbKvChecksumTestMergedBatch, DbKvChecksumTestMergedBatch,
|
|
547
|
+
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
|
|
548
|
+
WriteBatchOpType::kNum),
|
|
549
|
+
::testing::Range(static_cast<WriteBatchOpType>(0),
|
|
550
|
+
WriteBatchOpType::kNum),
|
|
551
|
+
::testing::Values(2, 103, 251)),
|
|
552
|
+
[](const testing::TestParamInfo<
|
|
553
|
+
std::tuple<WriteBatchOpType, WriteBatchOpType, char>>& args) {
|
|
554
|
+
std::ostringstream oss;
|
|
555
|
+
oss << GetOpTypeString(std::get<0>(args.param))
|
|
556
|
+
<< GetOpTypeString(std::get<1>(args.param)) << "Add"
|
|
557
|
+
<< static_cast<int>(
|
|
558
|
+
static_cast<unsigned char>(std::get<2>(args.param)));
|
|
559
|
+
return oss.str();
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
// TODO: add test for transactions
|
|
563
|
+
// TODO: add test for corrupted write batch with WAL disabled
|
|
199
564
|
} // namespace ROCKSDB_NAMESPACE
|
|
200
565
|
|
|
201
566
|
int main(int argc, char** argv) {
|
|
@@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) {
|
|
|
220
220
|
|
|
221
221
|
ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
|
|
222
222
|
Options c_opts = dbfull()->GetOptions(cfh);
|
|
223
|
+
|
|
223
224
|
const auto* c_bbto =
|
|
224
225
|
c_opts.table_factory->GetOptions<BlockBasedTableOptions>();
|
|
225
226
|
ASSERT_NE(c_bbto, nullptr);
|