@nxtedition/rocksdb 13.5.9 → 13.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/BUCK +2 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
- package/deps/rocksdb/rocksdb/Makefile +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
- package/deps/rocksdb/rocksdb/db/c.cc +13 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
- package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
- package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
- package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
- package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
- package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
- package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
- package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +51 -33
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
- package/deps/rocksdb/rocksdb/util/coding.h +4 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
- package/deps/rocksdb/rocksdb/util/compression.h +16 -6
- package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
- package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
- package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
- package/deps/rocksdb/rocksdb.gyp +15 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
|
@@ -57,6 +57,7 @@ struct WaitForCompactOptions;
|
|
|
57
57
|
class Env;
|
|
58
58
|
class EventListener;
|
|
59
59
|
class FileSystem;
|
|
60
|
+
class MultiScan;
|
|
60
61
|
class Replayer;
|
|
61
62
|
class StatsHistoryIterator;
|
|
62
63
|
class TraceReader;
|
|
@@ -1092,7 +1093,30 @@ class DB {
|
|
|
1092
1093
|
|
|
1093
1094
|
// Get an iterator that scans multiple key ranges. The scan ranges should
|
|
1094
1095
|
// be in increasing order of start key. See multi_scan_iterator.h for more
|
|
1095
|
-
// details.
|
|
1096
|
+
// details. For optimal performance, ensure that either all entries in
|
|
1097
|
+
// scan_opts specify the range limit, or none of them do.
|
|
1098
|
+
//
|
|
1099
|
+
// NOTE: iterate_upper_bound in ReadOptions will be ignored. Instead, the
|
|
1100
|
+
// range.limit in ScanOptions is consulted to determine the upper bound key,
|
|
1101
|
+
// if specified.
|
|
1102
|
+
//
|
|
1103
|
+
// Example usage -
|
|
1104
|
+
// std::vector<ScanOptions> scans{{.start = Slice("bar")},
|
|
1105
|
+
// {.start = Slice("foo")}};
|
|
1106
|
+
// std::unique_ptr<MultiScan> iter.reset(
|
|
1107
|
+
// db->NewMultiScan());
|
|
1108
|
+
// try {
|
|
1109
|
+
// for (auto scan : *iter) {
|
|
1110
|
+
// for (auto it : scan) {
|
|
1111
|
+
// // Do something with key - it.first
|
|
1112
|
+
// // Do something with value - it.second
|
|
1113
|
+
// }
|
|
1114
|
+
// }
|
|
1115
|
+
// } catch (MultiScanException& ex) {
|
|
1116
|
+
// // Check ex.status()
|
|
1117
|
+
// } catch (std::logic_error& ex) {
|
|
1118
|
+
// // Check ex.what()
|
|
1119
|
+
// }
|
|
1096
1120
|
virtual std::unique_ptr<MultiScan> NewMultiScan(
|
|
1097
1121
|
const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/,
|
|
1098
1122
|
const std::vector<ScanOptions>& /*scan_opts*/) {
|
|
@@ -866,6 +866,13 @@ class RandomAccessFile {
|
|
|
866
866
|
"RandomAccessFile::InvalidateCache not supported.");
|
|
867
867
|
}
|
|
868
868
|
|
|
869
|
+
// The default implementation returns "not supported" so that user
|
|
870
|
+
// implementations of FSRandomAccessFile do not need to immediately implement
|
|
871
|
+
// this function.
|
|
872
|
+
virtual Status GetFileSize(uint64_t* /*result*/) {
|
|
873
|
+
return Status::NotSupported("RandomAccessFile::GetFileSize not supported.");
|
|
874
|
+
}
|
|
875
|
+
|
|
869
876
|
// If you're adding methods here, remember to add them to
|
|
870
877
|
// RandomAccessFileWrapper too.
|
|
871
878
|
};
|
|
@@ -1750,6 +1757,9 @@ class RandomAccessFileWrapper : public RandomAccessFile {
|
|
|
1750
1757
|
Status InvalidateCache(size_t offset, size_t length) override {
|
|
1751
1758
|
return target_->InvalidateCache(offset, length);
|
|
1752
1759
|
}
|
|
1760
|
+
Status GetFileSize(uint64_t* file_size) override {
|
|
1761
|
+
return target_->GetFileSize(file_size);
|
|
1762
|
+
}
|
|
1753
1763
|
|
|
1754
1764
|
private:
|
|
1755
1765
|
RandomAccessFile* target_;
|
|
@@ -240,6 +240,15 @@ class EncryptedRandomAccessFile : public FSRandomAccessFile {
|
|
|
240
240
|
size_t GetRequiredBufferAlignment() const override;
|
|
241
241
|
|
|
242
242
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
243
|
+
|
|
244
|
+
// Intentionally leave GetFileSize not overridden here, so that it inherits
|
|
245
|
+
// the default implementation from its parent class, which is Not Supported.
|
|
246
|
+
//
|
|
247
|
+
// As GetFileSize API is not required to be implemented yet, we use encrypted
|
|
248
|
+
// file system in unit test to validate the rest of the system could continue
|
|
249
|
+
// working with the Not Supported behavior.
|
|
250
|
+
//
|
|
251
|
+
// IOStatus GetFileSize(uint64_t* /*result*/) override;
|
|
243
252
|
};
|
|
244
253
|
|
|
245
254
|
class EncryptedWritableFile : public FSWritableFile {
|
|
@@ -1051,6 +1051,14 @@ class FSRandomAccessFile {
|
|
|
1051
1051
|
// open.
|
|
1052
1052
|
virtual Temperature GetTemperature() const { return Temperature::kUnknown; }
|
|
1053
1053
|
|
|
1054
|
+
// Get the file size on an open-for-reading file without re-seeking the file's
|
|
1055
|
+
// path in the filesystem. The default implementation returns "not supported"
|
|
1056
|
+
// so that user implementations of FSRandomAccessFile do not need to
|
|
1057
|
+
// immediately implement this function.
|
|
1058
|
+
virtual IOStatus GetFileSize(uint64_t* /*result*/) {
|
|
1059
|
+
return IOStatus::NotSupported("GetFileSize Not Supported");
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1054
1062
|
// If you're adding methods here, remember to add them to
|
|
1055
1063
|
// RandomAccessFileWrapper too.
|
|
1056
1064
|
};
|
|
@@ -1772,6 +1780,10 @@ class FSRandomAccessFileWrapper : public FSRandomAccessFile {
|
|
|
1772
1780
|
return target_->GetTemperature();
|
|
1773
1781
|
}
|
|
1774
1782
|
|
|
1783
|
+
virtual IOStatus GetFileSize(uint64_t* result) override {
|
|
1784
|
+
return target_->GetFileSize(result);
|
|
1785
|
+
}
|
|
1786
|
+
|
|
1775
1787
|
private:
|
|
1776
1788
|
std::unique_ptr<FSRandomAccessFile> guard_;
|
|
1777
1789
|
FSRandomAccessFile* target_;
|
|
@@ -95,14 +95,18 @@ class Iterator : public IteratorBase {
|
|
|
95
95
|
return Slice();
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
//
|
|
99
|
-
//
|
|
100
|
-
//
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
//
|
|
104
|
-
//
|
|
105
|
-
//
|
|
98
|
+
// Prepare the iterator to scan the ranges specified in scan_opts. This
|
|
99
|
+
// includes prefetching relevant blocks from disk. The upper bound and
|
|
100
|
+
// other table specific limits should be specified for each
|
|
101
|
+
// scan for best results. If an upper bound is not specified, Prepare may
|
|
102
|
+
// skip prefetching as it cannot accurately determine how much to prefetch.
|
|
103
|
+
//
|
|
104
|
+
// Prepare should typically be followed by Seeks to the start keys in the
|
|
105
|
+
// order they're specified in scan_opts. If the user does a Seek to some
|
|
106
|
+
// other target key, the iterator should disregard the scan_opts from that
|
|
107
|
+
// point onwards and behave like a normal iterator. Its the user's
|
|
108
|
+
// responsibility to again call Prepare().
|
|
109
|
+
//
|
|
106
110
|
// If Prepare() is called, it overrides the iterate_upper_bound in
|
|
107
111
|
// ReadOptions
|
|
108
112
|
virtual void Prepare(const std::vector<ScanOptions>& /*scan_opts*/) {}
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
#pragma once
|
|
7
7
|
|
|
8
|
+
#include "rocksdb/db.h"
|
|
8
9
|
#include "rocksdb/iterator.h"
|
|
9
10
|
#include "rocksdb/options.h"
|
|
10
11
|
|
|
@@ -72,6 +73,8 @@ class Scan {
|
|
|
72
73
|
|
|
73
74
|
explicit Scan(Iterator* db_iter) : db_iter_(db_iter) {}
|
|
74
75
|
|
|
76
|
+
void Reset(Iterator* db_iter) { db_iter_ = db_iter; }
|
|
77
|
+
|
|
75
78
|
ScanIterator begin() { return ScanIterator(db_iter_); }
|
|
76
79
|
|
|
77
80
|
std::nullptr_t end() { return nullptr; }
|
|
@@ -149,9 +152,9 @@ class Scan {
|
|
|
149
152
|
// A Status exception is thrown if there is an error.
|
|
150
153
|
class MultiScan {
|
|
151
154
|
public:
|
|
152
|
-
MultiScan(const
|
|
153
|
-
std::
|
|
154
|
-
|
|
155
|
+
MultiScan(const ReadOptions& read_options,
|
|
156
|
+
const std::vector<ScanOptions>& scan_opts, DB* db,
|
|
157
|
+
ColumnFamilyHandle* cfh);
|
|
155
158
|
|
|
156
159
|
explicit MultiScan(std::unique_ptr<Iterator>&& db_iter)
|
|
157
160
|
: db_iter_(std::move(db_iter)) {}
|
|
@@ -168,9 +171,17 @@ class MultiScan {
|
|
|
168
171
|
using difference_type = int;
|
|
169
172
|
using iterator_category = std::input_iterator_tag;
|
|
170
173
|
|
|
171
|
-
MultiScanIterator(const std::vector<ScanOptions>& scan_opts,
|
|
172
|
-
|
|
173
|
-
|
|
174
|
+
MultiScanIterator(const std::vector<ScanOptions>& scan_opts, DB* db,
|
|
175
|
+
ColumnFamilyHandle* cfh, ReadOptions& read_options,
|
|
176
|
+
Slice* upper_bound, std::unique_ptr<Iterator>& db_iter)
|
|
177
|
+
: scan_opts_(scan_opts),
|
|
178
|
+
db_(db),
|
|
179
|
+
cfh_(cfh),
|
|
180
|
+
read_options_(read_options),
|
|
181
|
+
upper_bound_(upper_bound),
|
|
182
|
+
idx_(0),
|
|
183
|
+
db_iter_(db_iter),
|
|
184
|
+
scan_(db_iter_.get()) {
|
|
174
185
|
if (scan_opts_.empty()) {
|
|
175
186
|
throw std::logic_error("Zero scans in multi-scan");
|
|
176
187
|
}
|
|
@@ -181,28 +192,9 @@ class MultiScan {
|
|
|
181
192
|
}
|
|
182
193
|
}
|
|
183
194
|
|
|
184
|
-
explicit MultiScanIterator(const std::vector<ScanOptions>& scan_opts)
|
|
185
|
-
: scan_opts_(scan_opts),
|
|
186
|
-
idx_(scan_opts_.size()),
|
|
187
|
-
db_iter_(nullptr),
|
|
188
|
-
scan_(nullptr) {}
|
|
189
|
-
|
|
190
195
|
~MultiScanIterator() { assert(status_.ok()); }
|
|
191
196
|
|
|
192
|
-
MultiScanIterator& operator++()
|
|
193
|
-
if (idx_ >= scan_opts_.size()) {
|
|
194
|
-
throw std::logic_error("Index out of range");
|
|
195
|
-
}
|
|
196
|
-
idx_++;
|
|
197
|
-
if (idx_ < scan_opts_.size()) {
|
|
198
|
-
db_iter_->Seek(*scan_opts_[idx_].range.start);
|
|
199
|
-
status_ = db_iter_->status();
|
|
200
|
-
if (!status_.ok()) {
|
|
201
|
-
throw MultiScanException(status_);
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
return *this;
|
|
205
|
-
}
|
|
197
|
+
MultiScanIterator& operator++();
|
|
206
198
|
|
|
207
199
|
bool operator==(std::nullptr_t /*other*/) const {
|
|
208
200
|
return idx_ >= scan_opts_.size();
|
|
@@ -217,20 +209,29 @@ class MultiScan {
|
|
|
217
209
|
|
|
218
210
|
private:
|
|
219
211
|
const std::vector<ScanOptions>& scan_opts_;
|
|
212
|
+
DB* db_;
|
|
213
|
+
ColumnFamilyHandle* cfh_;
|
|
214
|
+
ReadOptions& read_options_;
|
|
215
|
+
Slice* upper_bound_;
|
|
220
216
|
size_t idx_;
|
|
221
|
-
Iterator
|
|
217
|
+
std::unique_ptr<Iterator>& db_iter_;
|
|
222
218
|
Status status_;
|
|
223
219
|
Scan scan_;
|
|
224
220
|
};
|
|
225
221
|
|
|
226
222
|
MultiScanIterator begin() {
|
|
227
|
-
return MultiScanIterator(scan_opts_,
|
|
223
|
+
return MultiScanIterator(scan_opts_, db_, cfh_, read_options_,
|
|
224
|
+
&upper_bound_, db_iter_);
|
|
228
225
|
}
|
|
229
226
|
|
|
230
227
|
std::nullptr_t end() { return nullptr; }
|
|
231
228
|
|
|
232
229
|
private:
|
|
230
|
+
ReadOptions read_options_;
|
|
233
231
|
const std::vector<ScanOptions> scan_opts_;
|
|
232
|
+
DB* db_;
|
|
233
|
+
ColumnFamilyHandle* cfh_;
|
|
234
|
+
Slice upper_bound_;
|
|
234
235
|
std::unique_ptr<Iterator> db_iter_;
|
|
235
236
|
};
|
|
236
237
|
|
|
@@ -57,6 +57,7 @@ class Statistics;
|
|
|
57
57
|
class InternalKeyComparator;
|
|
58
58
|
class WalFilter;
|
|
59
59
|
class FileSystem;
|
|
60
|
+
class UserDefinedIndexFactory;
|
|
60
61
|
|
|
61
62
|
struct Options;
|
|
62
63
|
struct DbPath;
|
|
@@ -608,6 +609,13 @@ struct DBOptions {
|
|
|
608
609
|
// checksums. True also enters a read-only mode when a DB write fails;
|
|
609
610
|
// see DB::Resume().
|
|
610
611
|
//
|
|
612
|
+
// When set to true, the DB will fail to open if any SST files fail to open
|
|
613
|
+
// e.g. due to incorrect file size or corrupted footer.
|
|
614
|
+
//
|
|
615
|
+
// When set to false, when there are files corrupted, the DB will still be
|
|
616
|
+
// opened, and the healthy ones could still be accessed, while corrupted one
|
|
617
|
+
// will not
|
|
618
|
+
//
|
|
611
619
|
// As most workloads value data correctness over availability, this option
|
|
612
620
|
// is on by default. Note that the name of this old option is potentially
|
|
613
621
|
// misleading, and other options and operations go further in proactive
|
|
@@ -1297,12 +1305,13 @@ struct DBOptions {
|
|
|
1297
1305
|
// Default: false
|
|
1298
1306
|
bool skip_stats_update_on_db_open = false;
|
|
1299
1307
|
|
|
1300
|
-
//
|
|
1301
|
-
//
|
|
1302
|
-
//
|
|
1303
|
-
//
|
|
1304
|
-
//
|
|
1305
|
-
// not
|
|
1308
|
+
// This option is deprecated and marked as no-op. Kept for backward
|
|
1309
|
+
// compatibility until usage is fully removed.
|
|
1310
|
+
// File size check will be performed through a thread
|
|
1311
|
+
// pool during DB Open, when max_open_files is set to -1.
|
|
1312
|
+
// Therefore, the concern of DB Open slowness is eliminated.
|
|
1313
|
+
// Note that when max_open_files is not set to -1, only a subset of files will
|
|
1314
|
+
// be opened and checked during DB Open.
|
|
1306
1315
|
//
|
|
1307
1316
|
// Default: false
|
|
1308
1317
|
bool skip_checking_sst_file_sizes_on_db_open = false;
|
|
@@ -2061,6 +2070,17 @@ struct ReadOptions {
|
|
|
2061
2070
|
// Default: false
|
|
2062
2071
|
bool auto_refresh_iterator_with_snapshot = false;
|
|
2063
2072
|
|
|
2073
|
+
// EXPERIMENTAL
|
|
2074
|
+
//
|
|
2075
|
+
// Specify an alternate index to use in the SST files instead of the native
|
|
2076
|
+
// block based table index. The table_factory used for the column family
|
|
2077
|
+
// must support building/reading this index.
|
|
2078
|
+
//
|
|
2079
|
+
// Currently, only forward scans are supported. For forward scans, only Seek()
|
|
2080
|
+
// is supported. SeekToFirst() is not supported. If the caller wishes to scan
|
|
2081
|
+
// from start to end, the native index must be used.
|
|
2082
|
+
const UserDefinedIndexFactory* table_index_factory = nullptr;
|
|
2083
|
+
|
|
2064
2084
|
// *** END options only relevant to iterators or scans ***
|
|
2065
2085
|
|
|
2066
2086
|
// *** BEGIN options for RocksDB internal use only ***
|
|
@@ -44,6 +44,7 @@ class TableReader;
|
|
|
44
44
|
class WritableFileWriter;
|
|
45
45
|
struct ConfigOptions;
|
|
46
46
|
struct EnvOptions;
|
|
47
|
+
class UserDefinedIndexFactory;
|
|
47
48
|
|
|
48
49
|
// Types of checksums to use for checking integrity of logical blocks within
|
|
49
50
|
// files. All checksums currently use 32 bits of checking power (1 in 4B
|
|
@@ -492,8 +493,16 @@ struct BlockBasedTableOptions {
|
|
|
492
493
|
// Because filters only impact performance and are not data-critical, an
|
|
493
494
|
// SST file can be opened and used without filters if (a) the filter
|
|
494
495
|
// policy name or schema is unrecognized, or (b) filter_policy is nullptr.
|
|
496
|
+
// See filter_policy regarding filters.
|
|
495
497
|
std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
|
|
496
498
|
|
|
499
|
+
// EXPERIMENTAL
|
|
500
|
+
//
|
|
501
|
+
// If non-nullptr, use the specified factory to build user-defined index.
|
|
502
|
+
// This allows users to define their own index format and build the index
|
|
503
|
+
// during table building.
|
|
504
|
+
std::shared_ptr<UserDefinedIndexFactory> user_defined_index_factory = nullptr;
|
|
505
|
+
|
|
497
506
|
// If true, place whole keys in the filter (not just prefixes).
|
|
498
507
|
// This must generally be true for gets to be efficient.
|
|
499
508
|
bool whole_key_filtering = true;
|
|
@@ -69,6 +69,7 @@ class ToolHooks {
|
|
|
69
69
|
virtual Status Open(const Options& options,
|
|
70
70
|
const blob_db::BlobDBOptions& bdb_options,
|
|
71
71
|
const std::string& dbname, blob_db::BlobDB** blob_db) = 0;
|
|
72
|
+
virtual void Exit(int status) = 0;
|
|
72
73
|
};
|
|
73
74
|
|
|
74
75
|
class DefaultHooks : public ToolHooks {
|
|
@@ -117,6 +118,8 @@ class DefaultHooks : public ToolHooks {
|
|
|
117
118
|
const blob_db::BlobDBOptions& bdb_options,
|
|
118
119
|
const std::string& dbname,
|
|
119
120
|
blob_db::BlobDB** blob_db) override;
|
|
121
|
+
|
|
122
|
+
virtual void Exit(int status) override { exit(status); }
|
|
120
123
|
};
|
|
121
124
|
|
|
122
125
|
extern DefaultHooks defaultHooks;
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
//
|
|
6
|
+
// *****************************************************************
|
|
7
|
+
// EXPERIMENTAL - subject to change while under development
|
|
8
|
+
// *****************************************************************
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <string>
|
|
13
|
+
|
|
14
|
+
#include "rocksdb/advanced_iterator.h"
|
|
15
|
+
#include "rocksdb/customizable.h"
|
|
16
|
+
#include "rocksdb/options.h"
|
|
17
|
+
#include "rocksdb/slice.h"
|
|
18
|
+
#include "rocksdb/status.h"
|
|
19
|
+
|
|
20
|
+
namespace ROCKSDB_NAMESPACE {
|
|
21
|
+
|
|
22
|
+
// Prefix for user-defined index block names
|
|
23
|
+
inline const std::string kUserDefinedIndexPrefix =
|
|
24
|
+
"rocksdb.user_defined_index.";
|
|
25
|
+
|
|
26
|
+
// This is a public API for user-defined index builders.
|
|
27
|
+
// It allows users to define their own index format and build custom
|
|
28
|
+
// indexes during table building. Currently, only a monolithic index
|
|
29
|
+
// block is supported (no partitioned index).
|
|
30
|
+
|
|
31
|
+
// The interface for building user-defined index.
|
|
32
|
+
class UserDefinedIndexBuilder {
|
|
33
|
+
public:
|
|
34
|
+
// Right now, we only support Puts. In the future, we may support merges,
|
|
35
|
+
// deletions etc.
|
|
36
|
+
enum ValueType {
|
|
37
|
+
kValue,
|
|
38
|
+
kTypeMax,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// File offset and size of the data block
|
|
42
|
+
struct BlockHandle {
|
|
43
|
+
uint64_t offset;
|
|
44
|
+
uint64_t size;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
virtual ~UserDefinedIndexBuilder() = default;
|
|
48
|
+
|
|
49
|
+
// Add a new index entry to index block. The key for the new index entry
|
|
50
|
+
// should be >= last_key_in_current_block and < first_key_in_next_block.
|
|
51
|
+
// The previous index entry key and the new index entry key cover
|
|
52
|
+
// all the keys in the data block associated with the new index entry.
|
|
53
|
+
//
|
|
54
|
+
// Called before the OnKeyAdded() call for first_key_in_next_block.
|
|
55
|
+
// @last_key_in_current_block: The last key in the current data block
|
|
56
|
+
// @first_key_in_next_block: it will be nullptr if the entry being added is
|
|
57
|
+
// the last one in the table
|
|
58
|
+
// @block_handle: offset/size of the data block referenced by this index
|
|
59
|
+
// entry. This should be stored along with the index entry
|
|
60
|
+
// key
|
|
61
|
+
// @separator_scratch: a scratch buffer to back a computed separator between
|
|
62
|
+
// those, as needed. May be modified on each call.
|
|
63
|
+
// @return: the key or separator stored in the index, which could be
|
|
64
|
+
// last_key_in_current_block or a computed separator backed by
|
|
65
|
+
// separator_scratch.
|
|
66
|
+
virtual Slice AddIndexEntry(const Slice& last_key_in_current_block,
|
|
67
|
+
const Slice* first_key_in_next_block,
|
|
68
|
+
const BlockHandle& block_handle,
|
|
69
|
+
std::string* separator_scratch) = 0;
|
|
70
|
+
|
|
71
|
+
// This method will be called whenever a key is added. The subclasses may
|
|
72
|
+
// override OnKeyAdded() if they need to collect additional information.
|
|
73
|
+
// The type argument indicates whether the value is a full value or partial.
|
|
74
|
+
// At the moment, only full values are supported.
|
|
75
|
+
virtual void OnKeyAdded(const Slice& /*key*/, ValueType /*type*/,
|
|
76
|
+
const Slice& /*value*/) {}
|
|
77
|
+
|
|
78
|
+
// Finish building the index.
|
|
79
|
+
// Returns a Status and the serialized index contents.
|
|
80
|
+
// The memory backing the contents should not be freed until this builder
|
|
81
|
+
// object is destructed.
|
|
82
|
+
virtual Status Finish(Slice* index_contents) = 0;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// The interface for iterating the user defined index. This will be
|
|
86
|
+
// instantiated and used by a scan to iterate through the index entries
|
|
87
|
+
// covered by the scan.
|
|
88
|
+
class UserDefinedIndexIterator {
|
|
89
|
+
public:
|
|
90
|
+
virtual ~UserDefinedIndexIterator() = default;
|
|
91
|
+
|
|
92
|
+
// Prepare the iterator for a series of scans. The iterator should use
|
|
93
|
+
// this as an opportunity to do any prefetching and buffering of results.
|
|
94
|
+
virtual void Prepare(const ScanOptions scan_opts[], size_t num_opts) = 0;
|
|
95
|
+
|
|
96
|
+
// Given the target key, position the index iterator at the index entry
|
|
97
|
+
// with the smallest key >= target. The result must be updated with the
|
|
98
|
+
// index key, and the bound_check_result. The bound_check_result should
|
|
99
|
+
// be set to kOutOfBound if no block satisfies the target key and
|
|
100
|
+
// termination criteria, kInbound if the data block is definitely fully
|
|
101
|
+
// within bounds, or kUnknown if the data block could be partially
|
|
102
|
+
// within bounds.
|
|
103
|
+
virtual Status SeekAndGetResult(const Slice& target,
|
|
104
|
+
IterateResult* result) = 0;
|
|
105
|
+
|
|
106
|
+
// Advance to the next index entry. The result must be populated similar
|
|
107
|
+
// to SeekAndGetResult.
|
|
108
|
+
virtual Status NextAndGetResult(IterateResult* result) = 0;
|
|
109
|
+
|
|
110
|
+
// Return the BlockHandle in the current index entry
|
|
111
|
+
virtual UserDefinedIndexBuilder::BlockHandle value() = 0;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// A reader interface for the user defined index
|
|
115
|
+
class UserDefinedIndexReader {
|
|
116
|
+
public:
|
|
117
|
+
virtual ~UserDefinedIndexReader() = default;
|
|
118
|
+
|
|
119
|
+
// Allocate an iterator that will be used by RocksDB to perform scans
|
|
120
|
+
virtual std::unique_ptr<UserDefinedIndexIterator> NewIterator(
|
|
121
|
+
const ReadOptions& read_options) = 0;
|
|
122
|
+
|
|
123
|
+
// The memory usage of the index, including the size of the raw contents and
|
|
124
|
+
// any other heap data structures allocated by the reader
|
|
125
|
+
virtual size_t ApproximateMemoryUsage() const = 0;
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
// Factory for creating user-defined index builders.
|
|
129
|
+
class UserDefinedIndexFactory : public Customizable {
|
|
130
|
+
public:
|
|
131
|
+
virtual ~UserDefinedIndexFactory() = default;
|
|
132
|
+
|
|
133
|
+
// Create a new builder for user-defined index.
|
|
134
|
+
virtual UserDefinedIndexBuilder* NewBuilder() const = 0;
|
|
135
|
+
|
|
136
|
+
// Create a new user defined index reader given the contents of the index
|
|
137
|
+
// block
|
|
138
|
+
virtual std::unique_ptr<UserDefinedIndexReader> NewReader(
|
|
139
|
+
Slice& index_block) const = 0;
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -396,9 +396,9 @@ struct TransactionOptions {
|
|
|
396
396
|
// due to too many memtables.
|
|
397
397
|
// Note that the ingestion relies on the transaction's underlying index,
|
|
398
398
|
// (WriteBatchWithIndex), so updates that are added to the transaction
|
|
399
|
-
// without indexing (e.
|
|
399
|
+
// without indexing (i.e. added directly to the transaction underlying
|
|
400
400
|
// write batch through Transaction::GetWriteBatch()->GetWriteBatch())
|
|
401
|
-
// are not supported
|
|
401
|
+
// are not supported, and the optimization will not apply in that case.
|
|
402
402
|
//
|
|
403
403
|
// NOTE: since WBWI keep track of the most recent update per key, a Put
|
|
404
404
|
// followed by a SingleDelete will be written to DB as a SingleDelete. This
|
|
@@ -379,6 +379,8 @@ class WriteBatchWithIndex : public WriteBatchBase {
|
|
|
379
379
|
};
|
|
380
380
|
const std::unordered_map<uint32_t, CFStat>& GetCFStats() const;
|
|
381
381
|
|
|
382
|
+
// The total number of operations issued into this WBWI.
|
|
383
|
+
size_t GetWBWIOpCount() const;
|
|
382
384
|
bool GetOverwriteKey() const;
|
|
383
385
|
|
|
384
386
|
private:
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
// NOTE: in 'main' development branch, this should be the *next*
|
|
13
13
|
// minor or major version number planned for release.
|
|
14
14
|
#define ROCKSDB_MAJOR 10
|
|
15
|
-
#define ROCKSDB_MINOR
|
|
16
|
-
#define ROCKSDB_PATCH
|
|
15
|
+
#define ROCKSDB_MINOR 5
|
|
16
|
+
#define ROCKSDB_PATCH 1
|
|
17
17
|
|
|
18
18
|
// Do not use these. We made the mistake of declaring macros starting with
|
|
19
19
|
// double underscore. Now we have to live with our choice. We'll deprecate these
|
|
@@ -72,6 +72,9 @@ std::unique_ptr<Configurable> CFOptionsAsConfigurable(
|
|
|
72
72
|
Status StringToMap(const std::string& opts_str,
|
|
73
73
|
std::unordered_map<std::string, std::string>* opts_map);
|
|
74
74
|
|
|
75
|
+
Status GetStringFromCompressionType(std::string* compression_str,
|
|
76
|
+
CompressionType compression_type);
|
|
77
|
+
|
|
75
78
|
struct OptionsHelper {
|
|
76
79
|
static const std::string kCFOptionsName /*= "ColumnFamilyOptions"*/;
|
|
77
80
|
static const std::string kDBOptionsName /*= "DBOptions" */;
|
|
@@ -129,6 +129,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
|
|
|
129
129
|
sizeof(CacheUsageOptions)},
|
|
130
130
|
{offsetof(struct BlockBasedTableOptions, filter_policy),
|
|
131
131
|
sizeof(std::shared_ptr<const FilterPolicy>)},
|
|
132
|
+
{offsetof(struct BlockBasedTableOptions, user_defined_index_factory),
|
|
133
|
+
sizeof(std::shared_ptr<UserDefinedIndexFactory>)},
|
|
132
134
|
};
|
|
133
135
|
|
|
134
136
|
// In this test, we catch a new option of BlockBasedTableOptions that is not
|
|
@@ -242,6 +242,16 @@ size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
|
|
|
242
242
|
return GetUniqueIdFromFile(hFile_, id, max_size);
|
|
243
243
|
}
|
|
244
244
|
|
|
245
|
+
IOStatus WinMmapReadableFile::GetFileSize(uint64_t* size) {
|
|
246
|
+
LARGE_INTEGER fileSize;
|
|
247
|
+
if (GetFileSizeEx(hFile_, &fileSize)) {
|
|
248
|
+
*size = fileSize.QuadPart;
|
|
249
|
+
return IOStatus::OK();
|
|
250
|
+
} else {
|
|
251
|
+
return IOStatus::IOError("Failed to get file size", filename_);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
245
255
|
///////////////////////////////////////////////////////////////////////////////
|
|
246
256
|
/// WinMmapFile
|
|
247
257
|
|
|
@@ -735,6 +745,16 @@ size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
|
|
|
735
745
|
return GetAlignment();
|
|
736
746
|
}
|
|
737
747
|
|
|
748
|
+
IOStatus WinRandomAccessFile::GetFileSize(uint64_t* size) {
|
|
749
|
+
LARGE_INTEGER fileSize;
|
|
750
|
+
if (GetFileSizeEx(hFile_, &fileSize)) {
|
|
751
|
+
*size = fileSize.QuadPart;
|
|
752
|
+
return IOStatus::OK();
|
|
753
|
+
} else {
|
|
754
|
+
return IOStatus::IOError("Failed to get file size", filename_);
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
|
|
738
758
|
/////////////////////////////////////////////////////////////////////////////
|
|
739
759
|
// WinWritableImpl
|
|
740
760
|
//
|
|
@@ -152,6 +152,8 @@ class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
|
|
|
152
152
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
153
153
|
|
|
154
154
|
size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
155
|
+
|
|
156
|
+
IOStatus GetFileSize(uint64_t* file_size) override;
|
|
155
157
|
};
|
|
156
158
|
|
|
157
159
|
// We preallocate and use memcpy to append new
|
|
@@ -292,6 +294,8 @@ class WinRandomAccessFile
|
|
|
292
294
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
293
295
|
|
|
294
296
|
size_t GetRequiredBufferAlignment() const override;
|
|
297
|
+
|
|
298
|
+
IOStatus GetFileSize(uint64_t* file_size) override;
|
|
295
299
|
};
|
|
296
300
|
|
|
297
301
|
// This is a sequential write class. It has been mimicked (as others) after
|
|
@@ -80,6 +80,7 @@ LIB_SOURCES = \
|
|
|
80
80
|
db/memtable_list.cc \
|
|
81
81
|
db/merge_helper.cc \
|
|
82
82
|
db/merge_operator.cc \
|
|
83
|
+
db/multi_scan.cc \
|
|
83
84
|
db/output_validator.cc \
|
|
84
85
|
db/periodic_task_scheduler.cc \
|
|
85
86
|
db/range_del_aggregator.cc \
|
|
@@ -237,13 +238,13 @@ LIB_SOURCES = \
|
|
|
237
238
|
trace_replay/block_cache_tracer.cc \
|
|
238
239
|
trace_replay/io_tracer.cc \
|
|
239
240
|
util/async_file_reader.cc \
|
|
241
|
+
util/auto_tune_compressor.cc \
|
|
240
242
|
util/build_version.cc \
|
|
241
243
|
util/cleanable.cc \
|
|
242
244
|
util/coding.cc \
|
|
243
245
|
util/compaction_job_stats_impl.cc \
|
|
244
246
|
util/comparator.cc \
|
|
245
247
|
util/compression.cc \
|
|
246
|
-
util/auto_skip_compressor.cc \
|
|
247
248
|
util/compression_context_cache.cc \
|
|
248
249
|
util/concurrent_task_limiter_impl.cc \
|
|
249
250
|
util/crc32c.cc \
|