@nxtedition/rocksdb 13.5.9 → 13.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/BUCK +2 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
- package/deps/rocksdb/rocksdb/Makefile +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
- package/deps/rocksdb/rocksdb/db/c.cc +13 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
- package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
- package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
- package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
- package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
- package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
- package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
- package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +51 -33
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
- package/deps/rocksdb/rocksdb/util/coding.h +4 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
- package/deps/rocksdb/rocksdb/util/compression.h +16 -6
- package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
- package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
- package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
- package/deps/rocksdb/rocksdb.gyp +15 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
|
@@ -102,6 +102,7 @@ class StopWatch {
|
|
|
102
102
|
};
|
|
103
103
|
|
|
104
104
|
// a nano second precision stopwatch
|
|
105
|
+
template <bool use_cpu_time = false>
|
|
105
106
|
class StopWatchNano {
|
|
106
107
|
public:
|
|
107
108
|
explicit StopWatchNano(SystemClock* clock, bool auto_start = false)
|
|
@@ -110,27 +111,36 @@ class StopWatchNano {
|
|
|
110
111
|
Start();
|
|
111
112
|
}
|
|
112
113
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
void Start() {
|
|
115
|
+
if constexpr (use_cpu_time) {
|
|
116
|
+
start_ = clock_->CPUNanos();
|
|
117
|
+
} else {
|
|
118
|
+
start_ = clock_->NowNanos();
|
|
119
|
+
}
|
|
120
|
+
}
|
|
116
121
|
uint64_t ElapsedNanos(bool reset = false) {
|
|
117
|
-
|
|
122
|
+
uint64_t now = 0;
|
|
123
|
+
if constexpr (use_cpu_time) {
|
|
124
|
+
now = clock_->CPUNanos();
|
|
125
|
+
} else {
|
|
126
|
+
now = clock_->NowNanos();
|
|
127
|
+
}
|
|
118
128
|
auto elapsed = now - start_;
|
|
119
129
|
if (reset) {
|
|
120
130
|
start_ = now;
|
|
121
131
|
}
|
|
122
132
|
return elapsed;
|
|
123
133
|
}
|
|
124
|
-
|
|
125
134
|
uint64_t ElapsedNanosSafe(bool reset = false) {
|
|
126
135
|
return (clock_ != nullptr) ? ElapsedNanos(reset) : 0U;
|
|
127
136
|
}
|
|
128
|
-
|
|
129
137
|
bool IsStarted() { return start_ != 0; }
|
|
138
|
+
uint64_t ElapsedMicros(bool reset = false) {
|
|
139
|
+
return ElapsedNanos(reset) / 1000;
|
|
140
|
+
}
|
|
130
141
|
|
|
131
142
|
private:
|
|
132
143
|
SystemClock* clock_;
|
|
133
144
|
uint64_t start_;
|
|
134
145
|
};
|
|
135
|
-
|
|
136
146
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -28,6 +28,10 @@
|
|
|
28
28
|
|
|
29
29
|
#include <future>
|
|
30
30
|
|
|
31
|
+
#include "test_util/testharness.h"
|
|
32
|
+
|
|
33
|
+
namespace ROCKSDB_NAMESPACE {
|
|
34
|
+
|
|
31
35
|
namespace Timing {
|
|
32
36
|
|
|
33
37
|
using Clock = std::chrono::high_resolution_clock;
|
|
@@ -39,7 +43,9 @@ double now() {
|
|
|
39
43
|
|
|
40
44
|
} // namespace Timing
|
|
41
45
|
|
|
42
|
-
|
|
46
|
+
class TimerQueueTest : public testing::Test {};
|
|
47
|
+
|
|
48
|
+
TEST_F(TimerQueueTest, BasicFunctionality) {
|
|
43
49
|
TimerQueue q;
|
|
44
50
|
|
|
45
51
|
double tnow = Timing::now();
|
|
@@ -68,6 +74,14 @@ int main() {
|
|
|
68
74
|
// assert(ret == 1);
|
|
69
75
|
// q.cancelAll();
|
|
70
76
|
|
|
71
|
-
|
|
77
|
+
// Test passes if we can create and add timers without crashing
|
|
78
|
+
ASSERT_TRUE(true);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
82
|
+
|
|
83
|
+
int main(int argc, char** argv) {
|
|
84
|
+
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
85
|
+
::testing::InitGoogleTest(&argc, argv);
|
|
86
|
+
return RUN_ALL_TESTS();
|
|
72
87
|
}
|
|
73
|
-
//////////////////////////////////////////
|
|
@@ -94,6 +94,16 @@ class RandomAccessFileMirror : public RandomAccessFile {
|
|
|
94
94
|
// NOTE: not verified
|
|
95
95
|
return a_->GetUniqueId(id, max_size);
|
|
96
96
|
}
|
|
97
|
+
|
|
98
|
+
Status GetFileSize(uint64_t* file_size) override {
|
|
99
|
+
uint64_t asize = 0, bsize = 0;
|
|
100
|
+
Status as = a_->GetFileSize(&asize);
|
|
101
|
+
Status bs = b_->GetFileSize(&bsize);
|
|
102
|
+
assert(as == bs);
|
|
103
|
+
assert(asize == bsize);
|
|
104
|
+
*file_size = asize;
|
|
105
|
+
return as;
|
|
106
|
+
}
|
|
97
107
|
};
|
|
98
108
|
|
|
99
109
|
class WritableFileMirror : public WritableFile {
|
|
@@ -159,6 +159,11 @@ Status TestRandomAccessFile::MultiRead(ReadRequest* reqs, size_t num_reqs) {
|
|
|
159
159
|
return target_->MultiRead(reqs, num_reqs);
|
|
160
160
|
}
|
|
161
161
|
|
|
162
|
+
Status TestRandomAccessFile::GetFileSize(uint64_t* file_size) {
|
|
163
|
+
assert(target_);
|
|
164
|
+
return target_->GetFileSize(file_size);
|
|
165
|
+
}
|
|
166
|
+
|
|
162
167
|
TestWritableFile::TestWritableFile(const std::string& fname,
|
|
163
168
|
std::unique_ptr<WritableFile>&& f,
|
|
164
169
|
FaultInjectionTestEnv* env)
|
|
@@ -59,6 +59,8 @@ class TestRandomAccessFile : public RandomAccessFile {
|
|
|
59
59
|
|
|
60
60
|
Status MultiRead(ReadRequest* reqs, size_t num_reqs) override;
|
|
61
61
|
|
|
62
|
+
Status GetFileSize(uint64_t* file_size) override;
|
|
63
|
+
|
|
62
64
|
private:
|
|
63
65
|
std::unique_ptr<RandomAccessFile> target_;
|
|
64
66
|
FaultInjectionTestEnv* env_;
|
|
@@ -399,10 +399,10 @@ IOStatus TestFSWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
|
|
|
399
399
|
return io_s;
|
|
400
400
|
}
|
|
401
401
|
|
|
402
|
-
TestFSRandomRWFile::TestFSRandomRWFile(const std::string&
|
|
402
|
+
TestFSRandomRWFile::TestFSRandomRWFile(const std::string& fname,
|
|
403
403
|
std::unique_ptr<FSRandomRWFile>&& f,
|
|
404
404
|
FaultInjectionTestFS* fs)
|
|
405
|
-
: target_(std::move(f)), file_opened_(true), fs_(fs) {
|
|
405
|
+
: fname_(fname), target_(std::move(f)), file_opened_(true), fs_(fs) {
|
|
406
406
|
assert(target_ != nullptr);
|
|
407
407
|
}
|
|
408
408
|
|
|
@@ -433,6 +433,7 @@ IOStatus TestFSRandomRWFile::Read(uint64_t offset, size_t n,
|
|
|
433
433
|
|
|
434
434
|
IOStatus TestFSRandomRWFile::Close(const IOOptions& options,
|
|
435
435
|
IODebugContext* dbg) {
|
|
436
|
+
fs_->RandomRWFileClosed(fname_);
|
|
436
437
|
if (!fs_->IsFilesystemActive()) {
|
|
437
438
|
return fs_->GetError();
|
|
438
439
|
}
|
|
@@ -562,6 +563,14 @@ size_t TestFSRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
|
|
|
562
563
|
}
|
|
563
564
|
}
|
|
564
565
|
|
|
566
|
+
IOStatus TestFSRandomAccessFile::GetFileSize(uint64_t* file_size) {
|
|
567
|
+
if (fs_->ShouldFailGetFileSize()) {
|
|
568
|
+
return IOStatus::IOError("GetFileSize failed");
|
|
569
|
+
} else {
|
|
570
|
+
return target_->GetFileSize(file_size);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
565
574
|
namespace {
|
|
566
575
|
// Modifies `result` to start at the beginning of `scratch` if not already,
|
|
567
576
|
// copying data there if needed.
|
|
@@ -1265,6 +1274,13 @@ IOStatus FaultInjectionTestFS::AbortIO(std::vector<void*>& io_handles) {
|
|
|
1265
1274
|
return target()->AbortIO(io_handles);
|
|
1266
1275
|
}
|
|
1267
1276
|
|
|
1277
|
+
void FaultInjectionTestFS::RandomRWFileClosed(const std::string& fname) {
|
|
1278
|
+
MutexLock l(&mutex_);
|
|
1279
|
+
if (open_managed_files_.find(fname) != open_managed_files_.end()) {
|
|
1280
|
+
open_managed_files_.erase(fname);
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1268
1284
|
void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) {
|
|
1269
1285
|
MutexLock l(&mutex_);
|
|
1270
1286
|
if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
|
|
@@ -106,8 +106,8 @@ class TestFSWritableFile : public FSWritableFile {
|
|
|
106
106
|
const bool unsync_data_loss_;
|
|
107
107
|
};
|
|
108
108
|
|
|
109
|
-
// A wrapper around
|
|
110
|
-
// is
|
|
109
|
+
// A wrapper around FSRandomRWFile* file
|
|
110
|
+
// is read from/write to or sync'ed.
|
|
111
111
|
class TestFSRandomRWFile : public FSRandomRWFile {
|
|
112
112
|
public:
|
|
113
113
|
explicit TestFSRandomRWFile(const std::string& fname,
|
|
@@ -128,6 +128,9 @@ class TestFSRandomRWFile : public FSRandomRWFile {
|
|
|
128
128
|
bool use_direct_io() const override { return target_->use_direct_io(); }
|
|
129
129
|
|
|
130
130
|
private:
|
|
131
|
+
// keep a copy of file name, so we can untrack it in File system, when it is
|
|
132
|
+
// closed
|
|
133
|
+
std::string fname_;
|
|
131
134
|
std::unique_ptr<FSRandomRWFile> target_;
|
|
132
135
|
bool file_opened_;
|
|
133
136
|
FaultInjectionTestFS* fs_;
|
|
@@ -155,6 +158,8 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
|
|
|
155
158
|
|
|
156
159
|
size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
157
160
|
|
|
161
|
+
IOStatus GetFileSize(uint64_t* file_size) override;
|
|
162
|
+
|
|
158
163
|
private:
|
|
159
164
|
std::unique_ptr<FSRandomAccessFile> target_;
|
|
160
165
|
FaultInjectionTestFS* fs_;
|
|
@@ -218,7 +223,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|
|
218
223
|
DeleteThreadLocalErrorContext),
|
|
219
224
|
ingest_data_corruption_before_write_(false),
|
|
220
225
|
checksum_handoff_func_type_(kCRC32c),
|
|
221
|
-
fail_get_file_unique_id_(false)
|
|
226
|
+
fail_get_file_unique_id_(false),
|
|
227
|
+
fail_get_file_size_(false) {}
|
|
222
228
|
virtual ~FaultInjectionTestFS() override { fs_error_.PermitUncheckedError(); }
|
|
223
229
|
|
|
224
230
|
static const char* kClassName() { return "FaultInjectionTestFS"; }
|
|
@@ -338,6 +344,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|
|
338
344
|
|
|
339
345
|
void WritableFileAppended(const FSFileState& state);
|
|
340
346
|
|
|
347
|
+
void RandomRWFileClosed(const std::string& fname);
|
|
348
|
+
|
|
341
349
|
IOStatus DropUnsyncedFileData();
|
|
342
350
|
|
|
343
351
|
IOStatus DropRandomUnsyncedFileData(Random* rnd);
|
|
@@ -477,6 +485,16 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|
|
477
485
|
return fail_get_file_unique_id_;
|
|
478
486
|
}
|
|
479
487
|
|
|
488
|
+
void SetFailGetFileSize(bool flag) {
|
|
489
|
+
MutexLock l(&mutex_);
|
|
490
|
+
fail_get_file_size_ = flag;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
bool ShouldFailGetFileSize() {
|
|
494
|
+
MutexLock l(&mutex_);
|
|
495
|
+
return fail_get_file_size_;
|
|
496
|
+
}
|
|
497
|
+
|
|
480
498
|
// Specify what the operation, so we can inject the right type of error
|
|
481
499
|
enum ErrorOperation : char {
|
|
482
500
|
kRead = 0,
|
|
@@ -636,6 +654,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|
|
636
654
|
bool ingest_data_corruption_before_write_;
|
|
637
655
|
ChecksumType checksum_handoff_func_type_;
|
|
638
656
|
bool fail_get_file_unique_id_;
|
|
657
|
+
bool fail_get_file_size_;
|
|
639
658
|
|
|
640
659
|
// Inject an error. For a READ operation, a status of IOError(), a
|
|
641
660
|
// corruption in the contents of scratch, or truncation of slice
|
|
@@ -359,6 +359,11 @@ Status PointLockManager::AcquireWithTimeout(
|
|
|
359
359
|
|
|
360
360
|
stripe->stripe_mutex->UnLock();
|
|
361
361
|
|
|
362
|
+
// On timeout, persist the lock information so we can debug the contention
|
|
363
|
+
if (result.IsTimedOut()) {
|
|
364
|
+
txn->SetWaitingTxn(wait_ids, column_family_id, &key, true);
|
|
365
|
+
}
|
|
366
|
+
|
|
362
367
|
return result;
|
|
363
368
|
}
|
|
364
369
|
|
|
@@ -898,10 +898,30 @@ Status WriteCommittedTxn::CommitInternal() {
|
|
|
898
898
|
if (!needs_ts) {
|
|
899
899
|
if (commit_bypass_memtable_threshold_ &&
|
|
900
900
|
wb_count >= commit_bypass_memtable_threshold_) {
|
|
901
|
-
|
|
901
|
+
if (wbwi->GetWBWIOpCount() != wb_count) {
|
|
902
|
+
ROCKS_LOG_WARN(
|
|
903
|
+
db_impl_->immutable_db_options().info_log,
|
|
904
|
+
"Transaction %s qualifies for commit optimization due to update "
|
|
905
|
+
"count. However, it will commit normally due to wbwi and wb record "
|
|
906
|
+
"count mismatch. Some updates were added directly to the "
|
|
907
|
+
"transaction's underlying write batch.",
|
|
908
|
+
GetName().c_str());
|
|
909
|
+
} else {
|
|
910
|
+
bypass_memtable = true;
|
|
911
|
+
}
|
|
902
912
|
} else if (commit_bypass_memtable_byte_threshold_ &&
|
|
903
913
|
wb->GetDataSize() >= commit_bypass_memtable_byte_threshold_) {
|
|
904
|
-
|
|
914
|
+
if (wbwi->GetWBWIOpCount() != wb_count) {
|
|
915
|
+
ROCKS_LOG_WARN(
|
|
916
|
+
db_impl_->immutable_db_options().info_log,
|
|
917
|
+
"Transaction %s qualifies for commit optimization due to write "
|
|
918
|
+
"batch size. However, it will commit normally due to wbwi and wb "
|
|
919
|
+
"record count mismatch. Some updates were added directly to the "
|
|
920
|
+
"transaction's underlying write batch.",
|
|
921
|
+
GetName().c_str());
|
|
922
|
+
} else {
|
|
923
|
+
bypass_memtable = true;
|
|
924
|
+
}
|
|
905
925
|
}
|
|
906
926
|
}
|
|
907
927
|
if (!bypass_memtable) {
|
|
@@ -71,18 +71,26 @@ class PessimisticTransaction : public TransactionBaseImpl {
|
|
|
71
71
|
std::string* key) const override {
|
|
72
72
|
std::lock_guard<std::mutex> lock(wait_mutex_);
|
|
73
73
|
std::vector<TransactionID> ids(waiting_txn_ids_.size());
|
|
74
|
-
if (
|
|
74
|
+
if (timed_out_key_.has_value()) {
|
|
75
|
+
if (key) *key = timed_out_key_.value();
|
|
76
|
+
} else {
|
|
77
|
+
if (key) *key = waiting_key_ ? *waiting_key_ : "";
|
|
78
|
+
}
|
|
75
79
|
if (column_family_id) *column_family_id = waiting_cf_id_;
|
|
76
80
|
std::copy(waiting_txn_ids_.begin(), waiting_txn_ids_.end(), ids.begin());
|
|
77
81
|
return ids;
|
|
78
82
|
}
|
|
79
83
|
|
|
80
84
|
void SetWaitingTxn(autovector<TransactionID> ids, uint32_t column_family_id,
|
|
81
|
-
const std::string* key) {
|
|
85
|
+
const std::string* key, bool is_timed_out = false) {
|
|
82
86
|
std::lock_guard<std::mutex> lock(wait_mutex_);
|
|
83
87
|
waiting_txn_ids_ = ids;
|
|
84
88
|
waiting_cf_id_ = column_family_id;
|
|
85
|
-
|
|
89
|
+
if (is_timed_out) {
|
|
90
|
+
timed_out_key_ = key ? *key : "";
|
|
91
|
+
} else {
|
|
92
|
+
waiting_key_ = key;
|
|
93
|
+
}
|
|
86
94
|
}
|
|
87
95
|
|
|
88
96
|
void ClearWaitingTxn() {
|
|
@@ -182,7 +190,7 @@ class PessimisticTransaction : public TransactionBaseImpl {
|
|
|
182
190
|
|
|
183
191
|
// IDs for the transactions that are blocking the current transaction.
|
|
184
192
|
//
|
|
185
|
-
// empty if current transaction is not waiting
|
|
193
|
+
// empty if current transaction is not waiting or has timed out
|
|
186
194
|
autovector<TransactionID> waiting_txn_ids_;
|
|
187
195
|
|
|
188
196
|
// The following two represents the (cf, key) that a transaction is waiting
|
|
@@ -196,6 +204,9 @@ class PessimisticTransaction : public TransactionBaseImpl {
|
|
|
196
204
|
uint32_t waiting_cf_id_;
|
|
197
205
|
const std::string* waiting_key_;
|
|
198
206
|
|
|
207
|
+
// Waiting key with lifetime of the txn so it can be accessed after timeouts
|
|
208
|
+
std::optional<std::string> timed_out_key_;
|
|
209
|
+
|
|
199
210
|
// Mutex protecting waiting_txn_ids_, waiting_cf_id_ and waiting_key_.
|
|
200
211
|
mutable std::mutex wait_mutex_;
|
|
201
212
|
|
|
@@ -561,6 +561,16 @@ TEST_P(TransactionTest, WaitingTxn) {
|
|
|
561
561
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
562
562
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
563
563
|
|
|
564
|
+
// We expect GetWaitingTxns still returns the waiting values as it would
|
|
565
|
+
// normally before timeout
|
|
566
|
+
std::string key;
|
|
567
|
+
uint32_t cf_id;
|
|
568
|
+
std::vector<TransactionID> wait = txn2->GetWaitingTxns(&cf_id, &key);
|
|
569
|
+
ASSERT_EQ(key, "foo");
|
|
570
|
+
ASSERT_EQ(wait.size(), 1);
|
|
571
|
+
ASSERT_EQ(wait[0], id1);
|
|
572
|
+
ASSERT_EQ(cf_id, 0U);
|
|
573
|
+
|
|
564
574
|
delete cfa;
|
|
565
575
|
delete txn1;
|
|
566
576
|
delete txn2;
|
|
@@ -9929,6 +9939,57 @@ TEST_P(CommitBypassMemtableTest,
|
|
|
9929
9939
|
|
|
9930
9940
|
delete txn_cf;
|
|
9931
9941
|
}
|
|
9942
|
+
|
|
9943
|
+
TEST_P(CommitBypassMemtableTest, WBWIOpCountMismatchWBCount) {
|
|
9944
|
+
// Tests that large txn optimization checks op count in WBWI vs WB. When an
|
|
9945
|
+
// update is written directly to a transaction's underlying write batch, the
|
|
9946
|
+
// optimization should not apply.
|
|
9947
|
+
SetUpTransactionDB();
|
|
9948
|
+
bool commit_bypass_memtable = false;
|
|
9949
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
9950
|
+
"WriteCommittedTxn::CommitInternal:bypass_memtable",
|
|
9951
|
+
[&](void* arg) { commit_bypass_memtable = *(static_cast<bool*>(arg)); });
|
|
9952
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
9953
|
+
|
|
9954
|
+
Random rnd(301);
|
|
9955
|
+
{
|
|
9956
|
+
WriteOptions wopts;
|
|
9957
|
+
TransactionOptions txn_opts;
|
|
9958
|
+
txn_opts.large_txn_commit_optimize_byte_threshold = 100;
|
|
9959
|
+
auto txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
|
|
9960
|
+
ASSERT_OK(txn->SetName("xid0"));
|
|
9961
|
+
ASSERT_OK(txn->Put("k1", rnd.RandomString(1000)));
|
|
9962
|
+
// This update is written directly to the underlying write batch, so the
|
|
9963
|
+
// optimization should not apply.
|
|
9964
|
+
ASSERT_OK(txn->GetWriteBatch()->GetWriteBatch()->Put("meta", "1"));
|
|
9965
|
+
ASSERT_OK(txn->Prepare());
|
|
9966
|
+
ASSERT_OK(txn->Commit());
|
|
9967
|
+
ASSERT_FALSE(commit_bypass_memtable);
|
|
9968
|
+
|
|
9969
|
+
ASSERT_EQ(Get("meta"), "1");
|
|
9970
|
+
delete txn;
|
|
9971
|
+
}
|
|
9972
|
+
|
|
9973
|
+
{
|
|
9974
|
+
WriteOptions wopts;
|
|
9975
|
+
TransactionOptions txn_opts;
|
|
9976
|
+
txn_opts.large_txn_commit_optimize_threshold = 10;
|
|
9977
|
+
auto txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
|
|
9978
|
+
ASSERT_OK(txn->SetName("xid0"));
|
|
9979
|
+
for (int i = 0; i < 10; ++i) {
|
|
9980
|
+
ASSERT_OK(txn->Put(Key(i), rnd.RandomString(10)));
|
|
9981
|
+
}
|
|
9982
|
+
// This update is written directly to the underlying write batch, so the
|
|
9983
|
+
// optimization should not apply.
|
|
9984
|
+
ASSERT_OK(txn->GetWriteBatch()->GetWriteBatch()->Put("meta", "2"));
|
|
9985
|
+
ASSERT_OK(txn->Prepare());
|
|
9986
|
+
ASSERT_OK(txn->Commit());
|
|
9987
|
+
ASSERT_FALSE(commit_bypass_memtable);
|
|
9988
|
+
|
|
9989
|
+
ASSERT_EQ(Get("meta"), "2");
|
|
9990
|
+
delete txn;
|
|
9991
|
+
}
|
|
9992
|
+
}
|
|
9932
9993
|
} // namespace ROCKSDB_NAMESPACE
|
|
9933
9994
|
|
|
9934
9995
|
int main(int argc, char** argv) {
|
|
@@ -635,4 +635,22 @@ void DBWithTTLImpl::SetTtl(ColumnFamilyHandle* h, int32_t ttl) {
|
|
|
635
635
|
filter->SetTtl(ttl);
|
|
636
636
|
}
|
|
637
637
|
|
|
638
|
+
Status DBWithTTLImpl::GetTtl(ColumnFamilyHandle* h, int32_t* ttl) {
|
|
639
|
+
if (h == nullptr || ttl == nullptr) {
|
|
640
|
+
return Status::InvalidArgument(
|
|
641
|
+
"column family handle or ttl cannot be null");
|
|
642
|
+
}
|
|
643
|
+
std::shared_ptr<TtlCompactionFilterFactory> filter;
|
|
644
|
+
Options opts;
|
|
645
|
+
opts = GetOptions(h);
|
|
646
|
+
filter = std::static_pointer_cast<TtlCompactionFilterFactory>(
|
|
647
|
+
opts.compaction_filter_factory);
|
|
648
|
+
if (!filter) {
|
|
649
|
+
return Status::InvalidArgument(
|
|
650
|
+
"TTLCompactionFilterFactory is not set for TTLDB");
|
|
651
|
+
}
|
|
652
|
+
*ttl = filter->GetTtl();
|
|
653
|
+
return Status::OK();
|
|
654
|
+
}
|
|
655
|
+
|
|
638
656
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -100,6 +100,8 @@ class DBWithTTLImpl : public DBWithTTL {
|
|
|
100
100
|
|
|
101
101
|
void SetTtl(ColumnFamilyHandle* h, int32_t ttl) override;
|
|
102
102
|
|
|
103
|
+
Status GetTtl(ColumnFamilyHandle* h, int32_t* ttl) override;
|
|
104
|
+
|
|
103
105
|
private:
|
|
104
106
|
// remember whether the Close completes or not
|
|
105
107
|
bool closed_;
|
|
@@ -184,6 +186,7 @@ class TtlCompactionFilterFactory : public CompactionFilterFactory {
|
|
|
184
186
|
std::unique_ptr<CompactionFilter> CreateCompactionFilter(
|
|
185
187
|
const CompactionFilter::Context& context) override;
|
|
186
188
|
void SetTtl(int32_t ttl) { ttl_ = ttl; }
|
|
189
|
+
int32_t GetTtl() { return ttl_; }
|
|
187
190
|
|
|
188
191
|
const char* Name() const override { return kClassName(); }
|
|
189
192
|
static const char* kClassName() { return "TtlCompactionFilterFactory"; }
|
|
@@ -720,6 +720,9 @@ TEST_F(TtlTest, ChangeTtlOnOpenDb) {
|
|
|
720
720
|
|
|
721
721
|
OpenTtl(1); // T=0:Open the db with ttl = 2
|
|
722
722
|
SetTtl(3);
|
|
723
|
+
int32_t ttl = 0;
|
|
724
|
+
ASSERT_OK(db_ttl_->GetTtl(db_ttl_->DefaultColumnFamily(), &ttl));
|
|
725
|
+
ASSERT_EQ(ttl, 3);
|
|
723
726
|
PutValues(0, kSampleSize_); // T=0:Insert Set1. Delete at t=2
|
|
724
727
|
SleepCompactCheck(2, 0, kSampleSize_, true); // T=2:Set1 should be there
|
|
725
728
|
CloseTtl();
|
|
@@ -32,7 +32,8 @@ struct WriteBatchWithIndex::Rep {
|
|
|
32
32
|
skip_list(comparator, &arena),
|
|
33
33
|
last_sub_batch_offset(0),
|
|
34
34
|
sub_batch_cnt(1),
|
|
35
|
-
overwrite_key(_overwrite_key)
|
|
35
|
+
overwrite_key(_overwrite_key),
|
|
36
|
+
op_count(0) {}
|
|
36
37
|
ReadableWriteBatch write_batch;
|
|
37
38
|
WriteBatchEntryComparator comparator;
|
|
38
39
|
Arena arena;
|
|
@@ -48,7 +49,8 @@ struct WriteBatchWithIndex::Rep {
|
|
|
48
49
|
// Tracks ids of CFs that have updates in this WBWI, number of updates and
|
|
49
50
|
// number of overwritten single deletions per cf. Useful for WBWIMemTable
|
|
50
51
|
// when this WBWI is ingested into a DB.
|
|
51
|
-
std::unordered_map<uint32_t, CFStat> cf_id_to_stat;
|
|
52
|
+
std::unordered_map<uint32_t, WriteBatchWithIndex::CFStat> cf_id_to_stat;
|
|
53
|
+
size_t op_count;
|
|
52
54
|
|
|
53
55
|
// In overwrite mode, find the existing entry for the same key and update it
|
|
54
56
|
// to point to the current entry if this is not a Merge operation.
|
|
@@ -154,6 +156,7 @@ bool WriteBatchWithIndex::Rep::UpdateExistingEntryWithCfId(
|
|
|
154
156
|
void WriteBatchWithIndex::Rep::AddOrUpdateIndexWithCfId(
|
|
155
157
|
uint32_t cf_id, const Slice& key, WriteType type, size_t last_entry_offset,
|
|
156
158
|
const Comparator* cf_cmp) {
|
|
159
|
+
op_count++;
|
|
157
160
|
uint32_t update_count = 0;
|
|
158
161
|
if (!UpdateExistingEntryWithCfId(cf_id, key, type, last_entry_offset,
|
|
159
162
|
&update_count)) {
|
|
@@ -201,7 +204,6 @@ void WriteBatchWithIndex::Rep::AddNewEntry(uint32_t column_family_id,
|
|
|
201
204
|
|
|
202
205
|
void WriteBatchWithIndex::Rep::Clear() {
|
|
203
206
|
write_batch.Clear();
|
|
204
|
-
cf_id_to_stat.clear();
|
|
205
207
|
ClearIndex();
|
|
206
208
|
}
|
|
207
209
|
|
|
@@ -212,6 +214,8 @@ void WriteBatchWithIndex::Rep::ClearIndex() {
|
|
|
212
214
|
new (&skip_list) WriteBatchEntrySkipList(comparator, &arena);
|
|
213
215
|
last_sub_batch_offset = 0;
|
|
214
216
|
sub_batch_cnt = 1;
|
|
217
|
+
cf_id_to_stat.clear();
|
|
218
|
+
op_count = 0;
|
|
215
219
|
}
|
|
216
220
|
|
|
217
221
|
Status WriteBatchWithIndex::Rep::ReBuildIndex() {
|
|
@@ -1173,5 +1177,7 @@ WriteBatchWithIndex::GetCFStats() const {
|
|
|
1173
1177
|
return rep->cf_id_to_stat;
|
|
1174
1178
|
}
|
|
1175
1179
|
|
|
1180
|
+
size_t WriteBatchWithIndex::GetWBWIOpCount() const { return rep->op_count; }
|
|
1181
|
+
|
|
1176
1182
|
bool WriteBatchWithIndex::GetOverwriteKey() const { return rep->overwrite_key; }
|
|
1177
1183
|
} // namespace ROCKSDB_NAMESPACE
|
package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc
CHANGED
|
@@ -342,6 +342,10 @@ void AssertIterEqual(WBWIIteratorImpl* wbwii,
|
|
|
342
342
|
}
|
|
343
343
|
ASSERT_FALSE(wbwii->Valid());
|
|
344
344
|
}
|
|
345
|
+
|
|
346
|
+
void AssertWBWICountEQWBCount(WriteBatchWithIndex& wbwi) {
|
|
347
|
+
ASSERT_EQ(wbwi.GetWBWIOpCount(), wbwi.GetWriteBatch()->Count());
|
|
348
|
+
}
|
|
345
349
|
} // namespace
|
|
346
350
|
|
|
347
351
|
class WBWIBaseTest : public testing::Test {
|
|
@@ -356,6 +360,8 @@ class WBWIBaseTest : public testing::Test {
|
|
|
356
360
|
}
|
|
357
361
|
|
|
358
362
|
virtual ~WBWIBaseTest() {
|
|
363
|
+
AssertWBWICountEQWBCount(*batch_);
|
|
364
|
+
|
|
359
365
|
if (db_ != nullptr) {
|
|
360
366
|
ReleaseSnapshot();
|
|
361
367
|
delete db_;
|
|
@@ -715,6 +721,7 @@ TEST_P(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) {
|
|
|
715
721
|
batch_.reset(new WriteBatchWithIndex(nullptr, 20, GetParam()));
|
|
716
722
|
|
|
717
723
|
TestValueAsSecondaryIndexHelper(entries_list, batch_.get(), GetParam());
|
|
724
|
+
AssertWBWICountEQWBCount(*batch_);
|
|
718
725
|
|
|
719
726
|
// Clear batch and re-run test with new values
|
|
720
727
|
batch_->Clear();
|
|
@@ -729,6 +736,7 @@ TEST_P(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) {
|
|
|
729
736
|
entries_list = std::vector<Entry>(new_entries, new_entries + 8);
|
|
730
737
|
|
|
731
738
|
TestValueAsSecondaryIndexHelper(entries_list, batch_.get(), GetParam());
|
|
739
|
+
AssertWBWICountEQWBCount(*batch_);
|
|
732
740
|
}
|
|
733
741
|
|
|
734
742
|
TEST_P(WriteBatchWithIndexTest, WBWIIteratorImpl) {
|
|
@@ -3816,6 +3824,7 @@ TEST_F(WBWIMemTableTest, ReadFromWBWIMemtable) {
|
|
|
3816
3824
|
// See comment for WBWIMemTable for sequence number assignment method.
|
|
3817
3825
|
expected_seqno[idx]++;
|
|
3818
3826
|
}
|
|
3827
|
+
AssertWBWICountEQWBCount(*wbwi);
|
|
3819
3828
|
// Get a non-existing key
|
|
3820
3829
|
found_final_value = false;
|
|
3821
3830
|
ASSERT_EQ("NOT_FOUND", Get("foo", wbwi_mem, visible_seq, &found_final_value));
|
package/deps/rocksdb/rocksdb.gyp
CHANGED
|
@@ -231,6 +231,7 @@
|
|
|
231
231
|
"rocksdb/db/memtable_list.cc",
|
|
232
232
|
"rocksdb/db/merge_helper.cc",
|
|
233
233
|
"rocksdb/db/merge_operator.cc",
|
|
234
|
+
"rocksdb/db/multi_scan.cc",
|
|
234
235
|
"rocksdb/db/output_validator.cc",
|
|
235
236
|
"rocksdb/db/periodic_task_scheduler.cc",
|
|
236
237
|
"rocksdb/db/range_del_aggregator.cc",
|
|
@@ -384,7 +385,7 @@
|
|
|
384
385
|
"rocksdb/trace_replay/trace_record.cc",
|
|
385
386
|
"rocksdb/trace_replay/trace_replay.cc",
|
|
386
387
|
"rocksdb/util/async_file_reader.cc",
|
|
387
|
-
"rocksdb/util/
|
|
388
|
+
"rocksdb/util/auto_tune_compressor.cc",
|
|
388
389
|
"rocksdb/util/cleanable.cc",
|
|
389
390
|
"rocksdb/util/coding.cc",
|
|
390
391
|
"rocksdb/util/compaction_job_stats_impl.cc",
|
|
@@ -483,6 +484,19 @@
|
|
|
483
484
|
"rocksdb/utilities/wal_filter.cc",
|
|
484
485
|
"rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc",
|
|
485
486
|
"rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc",
|
|
487
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc",
|
|
488
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc",
|
|
489
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc",
|
|
490
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc",
|
|
491
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc",
|
|
492
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc",
|
|
493
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc",
|
|
494
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc",
|
|
495
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc",
|
|
496
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc",
|
|
497
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc",
|
|
498
|
+
"rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc",
|
|
499
|
+
|
|
486
500
|
"build_version.cc",
|
|
487
501
|
]
|
|
488
502
|
}
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|